From c79beb54777ecb834420671378fad6e0716657df Mon Sep 17 00:00:00 2001
From: Tomasz Stanczyk <stanior666@gmail.com>
Date: Fri, 6 Feb 2026 16:37:37 +0100
Subject: [PATCH 01/14] Very first runnable BoT-SORT. Implement camera motion
 compensation with orb.

---
 trackers/core/botsort/__init__.py           |   8 +
 trackers/core/botsort/cmc.py                | 181 ++++++++++++
 trackers/core/botsort/kalman_box_tracker.py | 146 ++++++++++
 trackers/core/botsort/tracker.py            | 303 ++++++++++++++++++++
 4 files changed, 638 insertions(+)
 create mode 100644 trackers/core/botsort/__init__.py
 create mode 100644 trackers/core/botsort/cmc.py
 create mode 100644 trackers/core/botsort/kalman_box_tracker.py
 create mode 100644 trackers/core/botsort/tracker.py

diff --git a/trackers/core/botsort/__init__.py b/trackers/core/botsort/__init__.py
new file mode 100644
index 00000000..e0bc8c7c
--- /dev/null
+++ b/trackers/core/botsort/__init__.py
@@ -0,0 +1,8 @@
+# ------------------------------------------------------------------------
+# Trackers
+# Copyright (c) 2026 Roboflow. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------
+from .tracker import BoTSORTTracker
+
+__all__ = ["BoTSORTTracker"]
\ No newline at end of file
diff --git a/trackers/core/botsort/cmc.py b/trackers/core/botsort/cmc.py
new file mode 100644
index 00000000..838eb356
--- /dev/null
+++ b/trackers/core/botsort/cmc.py
@@ -0,0 +1,181 @@
+# ------------------------------------------------------------------------
+# Trackers
+# Copyright (c) 2026 Roboflow. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Optional
+
+import copy
+import numpy as np
+import cv2
+
+
+@dataclass
+class CMCConfig:
+    downscale: int = 2
+    fast_threshold: int = 20
+
+    # Affine estimation
+    ransac_reproj_threshold: float = 3.0
+
+    # Filtering matches by spatial displacement (fraction of image size)
+    max_spatial_distance_frac: float = 0.25
+
+    # Keep features from central ROI (avoid borders)
+    roi_min_frac: float = 0.02
+    roi_max_frac: float = 0.98
+
+
+class CMC:
+    def __init__(self, cfg: Optional[CMCConfig] = None) -> None:
+        self.cfg = cfg or CMCConfig()
+        self.downscale = max(1, int(self.cfg.downscale))
+
+        self.detector = cv2.FastFeatureDetector_create(self.cfg.fast_threshold)
+        self.extractor = cv2.ORB_create()
+        self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING)
+
+        self._initialized = False
+        self._prev_kps = None
+        self._prev_desc: Optional[np.ndarray] = None
+
+    def reset(self) -> None:
+        self._initialized = False
+        self._prev_kps = None
+        self._prev_desc = None
+
+    def estimate(self, frame_bgr: np.ndarray, dets_xyxy: Optional[np.ndarray] = None) -> np.ndarray:
+        if frame_bgr is None:
+            return np.eye(2, 3, dtype=np.float32)
+
+        H_img, W_img = frame_bgr.shape[:2]
+        gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
+
+        # Downscale for speed / robustness
+        if self.downscale > 1:
+            gray = cv2.resize(gray, (W_img // self.downscale, H_img // self.downscale))
+        H, W = gray.shape[:2]
+
+        # Build mask: central ROI + remove detections (background features)
+        mask = np.zeros_like(gray, dtype=np.uint8)
+        y0 = int(self.cfg.roi_min_frac * H)
+        y1 = int(self.cfg.roi_max_frac * H)
+        x0 = int(self.cfg.roi_min_frac * W)
+        x1 = int(self.cfg.roi_max_frac * W)
+        mask[y0:y1, x0:x1] = 255
+
+        if dets_xyxy is not None and len(dets_xyxy) > 0:
+            dets = np.asarray(dets_xyxy, dtype=np.float32) / float(self.downscale)
+            dets = dets.astype(np.int32)
+            dets[:, 0] = np.clip(dets[:, 0], 0, W - 1)
+            dets[:, 2] = np.clip(dets[:, 2], 0, W - 1)
+            dets[:, 1] = np.clip(dets[:, 1], 0, H - 1)
+            dets[:, 3] = np.clip(dets[:, 3], 0, H - 1)
+            for x1b, y1b, x2b, y2b in dets:
+                if x2b > x1b and y2b > y1b:
+                    mask[y1b:y2b, x1b:x2b] = 0
+
+        # Detect + describe
+        kps = self.detector.detect(gray, mask)
+        kps, desc = self.extractor.compute(gray, kps)
+
+        H_aff = np.eye(2, 3, dtype=np.float32)
+
+        # First frame: only initialize
+        if not self._initialized:
+            self._prev_kps = copy.copy(kps)
+            self._prev_desc = None if desc is None else copy.copy(desc)
+            self._initialized = True
+            return H_aff
+
+        # If missing descriptors
+        if self._prev_desc is None or desc is None or len(desc) == 0:
+            self._prev_kps = copy.copy(kps)
+            self._prev_desc = None if desc is None else copy.copy(desc)
+            return H_aff
+
+        # KNN match (k=2) + ratio test
+        knn = self.matcher.knnMatch(self._prev_desc, desc, k=2)
+        if len(knn) == 0:
+            self._prev_kps = copy.copy(kps)
+            self._prev_desc = copy.copy(desc)
+            return H_aff
+
+        max_spatial = self.cfg.max_spatial_distance_frac * np.array([W, H], dtype=np.float32)
+
+        prev_pts = []
+        curr_pts = []
+        spatial = []
+
+        for pair in knn:
+            if len(pair) < 2:
+                continue
+            m, n = pair
+            if m.distance < 0.9 * n.distance:
+                p_prev = np.array(self._prev_kps[m.queryIdx].pt, dtype=np.float32)
+                p_curr = np.array(kps[m.trainIdx].pt, dtype=np.float32)
+                d = p_prev - p_curr
+                if (abs(d[0]) < max_spatial[0]) and (abs(d[1]) < max_spatial[1]):
+                    spatial.append(d)
+                    prev_pts.append(p_prev)
+                    curr_pts.append(p_curr)
+
+        if len(prev_pts) >= 5:
+            spatial = np.asarray(spatial, dtype=np.float32)
+            mean = spatial.mean(axis=0)
+            std = spatial.std(axis=0) + 1e-6
+            inl = np.logical_and(
+                np.abs(spatial[:, 0] - mean[0]) < 2.5 * std[0],
+                np.abs(spatial[:, 1] - mean[1]) < 2.5 * std[1],
+            )
+            prev_pts_np = np.asarray(prev_pts, dtype=np.float32)[inl]
+            curr_pts_np = np.asarray(curr_pts, dtype=np.float32)[inl]
+
+            if len(prev_pts_np) >= 5:
+                H_est, _ = cv2.estimateAffinePartial2D(
+                    prev_pts_np,
+                    curr_pts_np,
+                    method=cv2.RANSAC,
+                    ransacReprojThreshold=self.cfg.ransac_reproj_threshold,
+                )
+                if H_est is not None:
+                    H_aff = H_est.astype(np.float32)
+                    if self.downscale > 1:
+                        H_aff[0, 2] *= self.downscale
+                        H_aff[1, 2] *= self.downscale
+
+        # Update prev
+        self._prev_kps = copy.copy(kps)
+        self._prev_desc = copy.copy(desc)
+
+        return H_aff
+
+    @staticmethod
+    def apply_to_tracks(tracks: list, H: np.ndarray) -> None:
+        if H is None or len(tracks) == 0:
+            return
+
+        H = H.astype(np.float32)
+        R = H[:2, :2]
+        t = H[:2, 2:3]  # (2,1)
+
+        # A4 maps [x1,y1,x2,y2]
+        A4 = np.zeros((4, 4), dtype=np.float32)
+        A4[0:2, 0:2] = R
+        A4[2:4, 2:4] = R
+
+        # A8 maps state (pos and vel blocks)
+        A8 = np.zeros((8, 8), dtype=np.float32)
+        A8[0:4, 0:4] = A4
+        A8[4:8, 4:8] = A4
+
+        trans4 = np.array([t[0, 0], t[1, 0], t[0, 0], t[1, 0]], dtype=np.float32).reshape(4, 1)
+
+        for trk in tracks:
+            trk.state = (A8 @ trk.state).astype(np.float32)
+            trk.state[0:4] += trans4
+            trk.P = (A8 @ trk.P @ A8.T).astype(np.float32)
diff --git a/trackers/core/botsort/kalman_box_tracker.py b/trackers/core/botsort/kalman_box_tracker.py
new file mode 100644
index 00000000..dc19df67
--- /dev/null
+++ b/trackers/core/botsort/kalman_box_tracker.py
@@ -0,0 +1,146 @@
+# ------------------------------------------------------------------------
+# Trackers
+# Copyright (c) 2026 Roboflow. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------
+
+import numpy as np
+
+class BoTSORTKalmanBoxTracker:
+    """
+    The `BoTSORTKalmanBoxTracker` class represents the internals of a single
+    tracked object (bounding box), with a Kalman filter to predict and update
+    its position.
+
+    Attributes:
+        tracker_id: Unique identifier for the tracker.
+        number_of_successful_updates: Number of times the object has been
+            updated successfully.
+        time_since_update: Number of frames since the last update.
+        state: State vector of the bounding box.
+        F: State transition matrix.
+        H: Measurement matrix.
+        Q: Process noise covariance matrix.
+        R: Measurement noise covariance matrix.
+        P: Error covariance matrix.
+        count_id: Class variable to assign unique IDs to each tracker.
+
+    Args:
+        bbox: Initial bounding box in the form [x1, y1, x2, y2].
+    """
+
+    count_id = 0
+
+    @classmethod
+    def get_next_tracker_id(cls) -> int:
+        """
+        Class method that returns the next available tracker ID.
+
+        Returns:
+            The next available tracker ID.
+        """
+        next_id = cls.count_id
+        cls.count_id += 1
+        return next_id
+
+    def __init__(self, bbox: np.ndarray):
+        # Initialize with a temporary ID of -1
+        # Will be assigned a real ID when the track is considered mature
+        self.tracker_id = -1
+
+        # Number of hits indicates how many times the object has been
+        # updated successfully
+        self.number_of_successful_updates = 1
+        # Number of frames since the last update
+        self.time_since_update = 0
+
+        # For simplicity, we keep a small state vector:
+        # (x, y, x2, y2, vx, vy, vx2, vy2).
+        # We'll store the bounding box in "self.state"
+        self.state = np.zeros((8, 1), dtype=np.float32)
+
+        # Initialize state directly from the first detection
+        self.state[0] = bbox[0]
+        self.state[1] = bbox[1]
+        self.state[2] = bbox[2]
+        self.state[3] = bbox[3]
+
+        # Basic constant velocity model
+        self._initialize_kalman_filter()
+
+    def _initialize_kalman_filter(self) -> None:
+        """
+        Sets up the matrices for the Kalman filter.
+        """
+        # State transition matrix (F): 8x8
+        # We assume a constant velocity model. Positions are incremented by
+        # velocity each step.
+        self.F = np.eye(8, dtype=np.float32)
+        for i in range(4):
+            self.F[i, i + 4] = 1.0
+
+        # Measurement matrix (H): we directly measure x1, y1, x2, y2
+        self.H = np.eye(4, 8, dtype=np.float32)  # 4x8
+
+        # Process covariance matrix (Q)
+        self.Q = np.eye(8, dtype=np.float32) * 0.01
+
+        # Measurement covariance (R): noise in detection
+        self.R = np.eye(4, dtype=np.float32) * 0.1
+
+        # Error covariance matrix (P)
+        self.P = np.eye(8, dtype=np.float32)
+
+    def predict(self) -> None:
+        """
+        Predict the next state of the bounding box (applies the state transition).
+        """
+        # Predict state
+        self.state = self.F @ self.state
+        # Predict error covariance
+        self.P = self.F @ self.P @ self.F.T + self.Q
+
+        # Increase time since update
+        self.time_since_update += 1
+
+    def update(self, bbox: np.ndarray) -> None:
+        """
+        Updates the state with a new detected bounding box.
+
+        Args:
+            bbox: Detected bounding box in the form [x1, y1, x2, y2].
+        """
+        self.time_since_update = 0
+        self.number_of_successful_updates += 1
+
+        # Kalman Gain
+        S = self.H @ self.P @ self.H.T + self.R
+        K = self.P @ self.H.T @ np.linalg.inv(S)
+
+        # Residual
+        measurement = bbox.reshape((4, 1))
+        y = measurement - self.H @ self.state
+
+        # Update state
+        self.state = self.state + K @ y
+
+        # Update covariance
+        identity_matrix = np.eye(8, dtype=np.float32)
+        self.P = (identity_matrix - K @ self.H) @ self.P
+
+    def get_state_bbox(self) -> np.ndarray:
+        """
+        Returns the current bounding box estimate from the state vector.
+
+        Returns:
+            The bounding box [x1, y1, x2, y2].
+        """
+        return np.array(
+            [
+                self.state[0],  # x1
+                self.state[1],  # y1
+                self.state[2],  # x2
+                self.state[3],  # y2
+            ],
+            dtype=float,
+        ).reshape(-1)
diff --git a/trackers/core/botsort/tracker.py b/trackers/core/botsort/tracker.py
new file mode 100644
index 00000000..c9ebcdba
--- /dev/null
+++ b/trackers/core/botsort/tracker.py
@@ -0,0 +1,303 @@
+# ------------------------------------------------------------------------
+# Trackers
+# Copyright (c) 2026 Roboflow. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------
+
+from copy import deepcopy
+from typing import cast
+
+import numpy as np
+import supervision as sv
+from scipy.optimize import linear_sum_assignment
+
+from trackers.core.base import BaseTracker
+from trackers.core.botsort.kalman_box_tracker import BoTSORTKalmanBoxTracker
+from trackers.utils.sort_utils import (
+    get_alive_trackers,
+    get_iou_matrix,
+)
+from trackers.core.botsort.cmc import CMC, CMCConfig
+
+class BoTSORTTracker(BaseTracker):
+    def __init__(
+        self,
+        lost_track_buffer: int = 30,
+        frame_rate: float = 30.0,
+        track_activation_threshold: float = 0.7,
+        minimum_consecutive_frames: int = 2,
+        minimum_iou_threshold: float = 0.1,
+        high_conf_det_threshold: float = 0.6,
+        enable_cmc: bool = True
+
+    ) -> None:
+        # Calculate maximum frames without update based on lost_track_buffer and
+        # frame_rate. This scales the buffer based on the frame rate to ensure
+        # consistent time-based tracking across different frame rates.
+        self.maximum_frames_without_update = int(frame_rate / 30.0 * lost_track_buffer)
+        self.minimum_consecutive_frames = minimum_consecutive_frames
+        self.minimum_iou_threshold = minimum_iou_threshold
+        self.track_activation_threshold = track_activation_threshold
+        self.high_conf_det_threshold = high_conf_det_threshold
+        self.tracks: list[BoTSORTKalmanBoxTracker] = []
+
+        self.enable_cmc = enable_cmc
+        self.cmc = CMC(CMCConfig()) if enable_cmc else None
+
+    def _update_detections(
+        self,
+        tracks: list[BoTSORTKalmanBoxTracker],
+        detections: sv.Detections,
+        updated_detections: list[sv.Detections],
+        matched_indices: list[tuple[int, int]],
+    ) -> list[sv.Detections]:
+        # Update matched tracks with assigned detections.
+        det_bboxes = detections.xyxy
+        for row, col in matched_indices:
+            t = tracks[row]
+            t.update(det_bboxes[col])
+            # If tracker is mature but still has ID -1, assign a new ID
+            if (
+                t.number_of_successful_updates >= self.minimum_consecutive_frames
+                and t.tracker_id == -1
+            ):  # Check maturity before assigning ID
+                t.tracker_id = BoTSORTKalmanBoxTracker.get_next_tracker_id()
+
+            new_det = deepcopy(detections[col : col + 1])
+            # Add cast to clarify type for mypy
+            new_det = cast(sv.Detections, new_det)  # ADDED cast
+            new_det.tracker_id = np.array([t.tracker_id])
+            updated_detections.append(new_det)
+        return updated_detections
+
+    def update(
+        self,
+        detections: sv.Detections,
+        frame: np.ndarray,
+    ) -> sv.Detections:
+        if len(self.tracks) == 0 and len(detections) == 0:
+            detections.tracker_id = np.array([], dtype=int)
+            return detections
+        updated_detections: list[
+            sv.Detections
+        ] = []  # List for returning the updated detections with its new assigned track id # noqa: E501
+
+        # Predict new locations for existing tracks
+        for tracker in self.tracks:
+            tracker.predict()
+        # Assign a default tracker_id with the correct shape
+        detections.tracker_id = -np.ones(len(detections))
+        # Split into high confidence boxes and lower based on self.high_conf_det_threshold # noqa: E501
+        high_prob_detections, low_prob_detections = (
+            self._get_high_and_low_probability_detections(detections)
+        )
+
+        # CMC (ORB) apply to all predicted tracks before association
+        if self.enable_cmc and self.cmc is not None and frame is not None:
+            mask_boxes = high_prob_detections.xyxy if len(high_prob_detections) > 0 else None
+            H = self.cmc.estimate(frame, mask_boxes)
+            self.cmc.apply_to_tracks(self.tracks, H)
+
+        # Step 1: first association, with high confidence boxes
+        matched_indices, unmatched_tracks, unmatched_high_prob_detections = (
+            self._similarity_step(
+                high_prob_detections,
+                self.tracks,
+            )
+        )
+
+        # Update matched tracks with high-confidence detections
+        self._update_detections(
+            self.tracks,
+            high_prob_detections,
+            updated_detections,
+            matched_indices,
+        )
+
+        remaining_tracks = [self.tracks[i] for i in unmatched_tracks]
+
+        # Step 2: associate Low Probability detections with remaining tracks
+        matched_indices, unmatched_tracks, unmatched_detections = self._similarity_step(
+            low_prob_detections, remaining_tracks
+        )
+
+        # Update matched tracks with low-confidence detections
+        self._update_detections(
+            remaining_tracks,
+            low_prob_detections,
+            updated_detections,
+            matched_indices,
+        )
+
+        # Add unmatched low prob predictions to updated predictions
+        for det_index in unmatched_detections:
+            new_det = deepcopy(low_prob_detections[det_index : det_index + 1])
+
+            new_det.tracker_id = np.array([-1])
+            updated_detections.append(new_det)
+
+        self._spawn_new_trackers(
+            high_prob_detections,
+            high_prob_detections.xyxy,
+            unmatched_high_prob_detections,
+            updated_detections,
+        )
+
+        # Kill lost tracks
+        self.tracks = get_alive_trackers(
+            trackers=self.tracks,
+            maximum_frames_without_update=self.maximum_frames_without_update,
+            minimum_consecutive_frames=self.minimum_consecutive_frames,
+        )
+        final_updated_detections: sv.Detections = sv.Detections.merge(
+            updated_detections
+        )
+        if len(final_updated_detections) == 0:
+            final_updated_detections.tracker_id = np.array([], dtype=int)
+        return final_updated_detections
+
+    def _get_high_and_low_probability_detections(
+        self, detections: sv.Detections
+    ) -> tuple[sv.Detections, sv.Detections]:
+        """
+        Splits the input detections into high-confidence and low-confidence sets
+        based on the `self.high_conf_det_threshold`.
+
+        Args:
+            detections: The input detections with confidence scores.
+
+        Returns:
+            A tuple containing two `sv.Detections objects`: the first for
+                high-confidence detections `(confidence >= threshold)` and the second
+                for low-confidence detections `(confidence < threshold)`.
+        """
+        # Check if confidence scores exist before comparing
+        if detections.confidence is not None:
+            # Perform element-wise comparison if confidence is a NumPy array
+            condition = detections.confidence >= self.high_conf_det_threshold
+        else:
+            # If no confidence scores, no detections meet the threshold
+            # Create a boolean array of False with the same length as detections
+            condition = np.zeros(len(detections), dtype=bool)
+
+        high_confidence = detections[condition]
+        low_confidence = detections[np.logical_not(condition)]
+        return high_confidence, low_confidence
+
+    def _get_associated_indices(
+        self,
+        similarity_matrix: np.ndarray,
+        min_similarity_thresh: float,
+    ) -> tuple[list[tuple[int, int]], set[int], set[int]]:
+        """
+        Associate detections to tracks based on Similarity (IoU) using the
+        Jonker-Volgenant algorithm approach with no initialization instead of the
+        Hungarian algorithm as mentioned in the SORT paper, but it solves the
+        assignment problem in an optimal way.
+
+        Args:
+            similarity_matrix: Similarity matrix between tracks (rows) and detections (columns).
+            min_similarity_thresh: Minimum similarity threshold for a valid match.
+
+        Returns:
+            Matched indices (list of (tracker_idx, detection_idx)), indices of
+                unmatched tracks, indices of unmatched detections.
+        """  # noqa: E501
+        matched_indices = []
+        n_tracks, n_detections = similarity_matrix.shape
+        unmatched_tracks = set(range(n_tracks))
+        unmatched_detections = set(range(n_detections))
+
+        if n_tracks > 0 and n_detections > 0:
+            row_indices, col_indices = linear_sum_assignment(
+                similarity_matrix, maximize=True
+            )
+            for row, col in zip(row_indices, col_indices):
+                if similarity_matrix[row, col] >= min_similarity_thresh:
+                    matched_indices.append((row, col))
+                    unmatched_tracks.remove(row)
+                    unmatched_detections.remove(col)
+
+        return matched_indices, unmatched_tracks, unmatched_detections
+
+    def _spawn_new_trackers(
+        self,
+        detections: sv.Detections,
+        detection_boxes: np.ndarray,
+        unmatched_detections: set[int],
+        updated_detections: list[sv.Detections],
+    ):
+        """
+        Create new trackers for unmatched detections and
+            append detections to updated_detections detections.
+
+        Args:
+            detections: Current detections.
+            detection_boxes: Bounding boxes for detections.
+            unmatched_detections: Indices of unmatched detections.
+            updated_detections: List with all the detections
+
+        """
+        for detection_idx in unmatched_detections:
+            # Check for detections.confidence existence and index bounds
+            if detections.confidence is not None and detection_idx < len(
+                detections.confidence
+            ):
+                # Assign to a temporary variable with explicit type hint
+                confidence_score: float = float(detections.confidence[detection_idx])
+
+                # Use the temporary variable in the comparison
+                if confidence_score >= self.track_activation_threshold:
+                    # Original logic for high confidence detection
+
+                    new_tracker = BoTSORTKalmanBoxTracker(
+                        bbox=detection_boxes[detection_idx]
+                    )
+                    self.tracks.append(new_tracker)
+
+                    new_det = deepcopy(detections[detection_idx : detection_idx + 1])
+                    new_det = cast(sv.Detections, new_det)  # Cast added previously
+                    new_det.tracker_id = np.array([-1])
+                    updated_detections.append(new_det)
+            else:
+                pass  # Do nothing, the detection remains unmatched
+
+    def _similarity_step(
+        self,
+        detections: sv.Detections,
+        tracks: list[BoTSORTKalmanBoxTracker],
+    ) -> tuple[list[tuple[int, int]], set[int], set[int]]:
+        """Measures similarity based on IoU between tracks and detections and returns the matches
+            and unmatched tracks/detections. Is used for step 1 and 2 of the BYTE algorithm.
+
+        Args:
+            detections: The set of object detections.
+            tracks: The list of tracks that will be matched to the detections.
+
+        Returns:
+            A tuple containing:
+                - matched_indices: A list of (tracker_idx, detection_idx) pairs.
+                - unmatched_tracks_indices: A set of indices for tracks that
+                  were not matched.
+                - unmatched_detections_indices: A set of indices for detections
+                  that were not matched.
+        """  # noqa: E501
+        # Build IoU cost matrix between detections and predicted bounding boxes
+        similarity_matrix = get_iou_matrix(tracks, detections.xyxy)
+        thresh = self.minimum_iou_threshold
+
+        # Associate detections to tracks based on the higher value of the
+        # similarity matrix, using the Jonker-Volgenant algorithm (linear_sum_assignment). # noqa: E501
+        matched_indices, unmatched_tracks, unmatched_detections = (
+            self._get_associated_indices(similarity_matrix, thresh)
+        )
+        return matched_indices, unmatched_tracks, unmatched_detections
+
+    def reset(self) -> None:
+        """Reset tracker state by clearing all tracks and resetting ID counter.
+        Call this method when switching to a new video or scene.
+        """
+        self.tracks = []
+        BoTSORTKalmanBoxTracker.count_id = 0
+        if self.cmc is not None:
+            self.cmc.reset()

From de8a7b52cc6b97b2b8667df9adb61acfc35ff678 Mon Sep 17 00:00:00 2001
From: Tomasz Stanczyk <stanior666@gmail.com>
Date: Wed, 4 Mar 2026 10:07:51 +0100
Subject: [PATCH 02/14] Add sparse optical flow cmc to BoT-SORT. Update code
 documentation.

---
 trackers/core/botsort/cmc.py     | 333 ++++++++++++++++++++++++++++---
 trackers/core/botsort/tracker.py | 101 +++++++++-
 trackers/core/botsort/utils.py   | 153 ++++++++++++++
 3 files changed, 561 insertions(+), 26 deletions(-)
 create mode 100644 trackers/core/botsort/utils.py

diff --git a/trackers/core/botsort/cmc.py b/trackers/core/botsort/cmc.py
index 838eb356..4fec6eec 100644
--- a/trackers/core/botsort/cmc.py
+++ b/trackers/core/botsort/cmc.py
@@ -7,60 +7,244 @@
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import Optional
+from typing import Optional, Literal
 
 import copy
 import numpy as np
 import cv2
 
+CMCTMethod = Literal["orb", "sparseOptFlow"]
 
 @dataclass
 class CMCConfig:
+    """
+    Configuration for camera motion compensation (CMC).
+
+    The CMC module estimates a global 2D affine transform `H` (2x3) between consecutive frames.
+    This transform is then applied to predicted track states before data association.
+
+    Attributes:
+        method:
+            Camera motion estimation method.
+
+            - "orb": Feature matching using FAST keypoints + ORB descriptors + BFMatcher,
+              followed by robust affine estimation (RANSAC).
+              Optionally masks out detection boxes so features are extracted from background.
+            - "sparseOptFlow": Sparse optical flow using corner tracking:
+              goodFeaturesToTrack -> calcOpticalFlowPyrLK -> robust affine estimation (RANSAC).
+
+        downscale:
+            Integer downscale factor applied to frames before running CMC.
+
+            Purpose:
+            - Speeds up feature extraction / optical flow.
+
+            Behavior:
+            - Frames are resized to (W//downscale, H//downscale) for motion estimation.
+            - The resulting affine translation components H[0,2], H[1,2] are scaled back
+              by multiplying by `downscale`, so the transform is in original image coordinates.
+
+        fast_threshold:
+            (ORB only) Threshold for the FAST keypoint detector.
+            Higher values yield fewer keypoints (more selective); lower values yield more keypoints.
+
+        ransac_reproj_threshold:
+            (ORB only) RANSAC reprojection threshold in pixels passed to
+            OpenCV’s affine estimation. It controls how far a point is allowed to deviate from the
+            estimated model while still being counted as an inlier.
+            Smaller values are stricter (reject more matches); larger values are more tolerant.
+
+        max_spatial_distance_frac:
+            (ORB only) Maximum allowed spatial displacement for a tentative match, expressed as a
+            fraction of (image width, image height) *after downscale*.
+
+            Example:
+                If max_spatial_distance_frac = 0.25 and the downscaled frame is (W, H),
+                then a match is rejected if |dx| >= 0.25*W or |dy| >= 0.25*H.
+
+            Motivation:
+                Reject obviously incorrect descriptor matches whose displacement is implausibly large.
+
+        roi_min_frac:
+            (ORB only) Lower bound of the region-of-interest (ROI) used to select keypoints,
+            expressed as a fraction of frame size. Points outside the ROI are masked out.
+
+            Example:
+                roi_min_frac=0.02 means we ignore a ~2% border on each side.
+
+        roi_max_frac:
+            (ORB only) Upper bound of the ROI used to select keypoints (fraction of frame size).
+            Together with roi_min_frac, it defines a central rectangle:
+                [roi_min_frac..roi_max_frac] in both x and y.
+
+        sof_max_corners:
+            (SparseOptFlow only) `maxCorners` passed to `cv2.goodFeaturesToTrack`.
+            Maximum number of corners to detect for tracking.
+            Larger values can improve robustness (more points), but cost more compute.
+
+        sof_quality_level:
+            (SparseOptFlow only) `qualityLevel` passed to `cv2.goodFeaturesToTrack`.
+            Minimum accepted quality of corners. A higher value keeps only stronger corners;
+            a lower value yields more corners (including weaker ones).
+
+        sof_min_distance:
+            (SparseOptFlow only) `minDistance` passed to `cv2.goodFeaturesToTrack`.
+            Minimum Euclidean distance (in pixels) between returned corners.
+            Higher values produce more spatially spread points; lower values allow clustering.
+
+        sof_block_size:
+            (SparseOptFlow only) `blockSize` passed to `cv2.goodFeaturesToTrack`.
+            Size of the neighborhood used to compute corner quality (structure tensor window).
+
+        sof_use_harris:
+            (SparseOptFlow only) `useHarrisDetector` passed to `cv2.goodFeaturesToTrack`.
+            If True, uses the Harris corner measure; if False, uses the Shi-Tomasi measure.
+
+        sof_k:
+            (SparseOptFlow only) `k` passed to `cv2.goodFeaturesToTrack`.
+            Harris detector free parameter. Ignored if `sof_use_harris` is False.
+    """
+    method: CMCTMethod = "orb"
     downscale: int = 2
-    fast_threshold: int = 20
 
-    # Affine estimation
+    # ORB parameters
+    fast_threshold: int = 20
     ransac_reproj_threshold: float = 3.0
-
-    # Filtering matches by spatial displacement (fraction of image size)
     max_spatial_distance_frac: float = 0.25
-
-    # Keep features from central ROI (avoid borders)
     roi_min_frac: float = 0.02
     roi_max_frac: float = 0.98
 
+    # Sparse optical flow parameters (goodFeaturesToTrack)
+    sof_max_corners: int = 1000
+    sof_quality_level: float = 0.01
+    sof_min_distance: int = 1
+    sof_block_size: int = 3
+    sof_use_harris: bool = False
+    sof_k: float = 0.04
+
 
 class CMC:
+    """
+    Camera motion compensation estimator and track state warper.
+
+    Typical usage in the tracker loop:
+        H = cmc.estimate(frame_bgr, mask_boxes_xyxy)
+        CMC.apply_to_tracks(tracks, H)
+
+    Internal state:
+        - Keeps previous-frame features / points depending on the chosen method.
+        - On the first frame (or after reset), returns identity transform.
+
+    Notes:
+        - H maps points from previous frame coordinates to current frame coordinates.
+        - This class does not perform any drawing/visualization; it only estimates transforms.
+    """
+
     def __init__(self, cfg: Optional[CMCConfig] = None) -> None:
+        """
+        Initialize CMC.
+
+        Args:
+            cfg: Optional configuration. If None, defaults are used.
+
+        Notes:
+            - ORB detector/extractor/matcher are only created if method == "orb".
+            - Sparse optical flow parameters are always initialized (cheap).
+        """
         self.cfg = cfg or CMCConfig()
         self.downscale = max(1, int(self.cfg.downscale))
 
-        self.detector = cv2.FastFeatureDetector_create(self.cfg.fast_threshold)
-        self.extractor = cv2.ORB_create()
-        self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING)
+        # ORB init (only if needed)
+        self.detector = None
+        self.extractor = None
+        self.matcher = None
+        if self.cfg.method == "orb":
+            self.detector = cv2.FastFeatureDetector_create(self.cfg.fast_threshold)
+            self.extractor = cv2.ORB_create()
+            self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING)
+
+        # SparseOptFlow params
+        self.feature_params = dict(
+            maxCorners=self.cfg.sof_max_corners,
+            qualityLevel=self.cfg.sof_quality_level,
+            minDistance=self.cfg.sof_min_distance,
+            blockSize=self.cfg.sof_block_size,
+            useHarrisDetector=self.cfg.sof_use_harris,
+            k=self.cfg.sof_k,
+        )
+
+        self.reset()
 
+    def reset(self) -> None:
+        """
+        Reset internal state.
+
+        After calling reset:
+        - The next `estimate()` call returns identity and initializes prev-frame state.
+        - This should be called when starting a new sequence or after a scene cut.
+        """
         self._initialized = False
+
+        # ORB state
         self._prev_kps = None
         self._prev_desc: Optional[np.ndarray] = None
 
-    def reset(self) -> None:
-        self._initialized = False
-        self._prev_kps = None
-        self._prev_desc = None
+        # SparseOptFlow state
+        self._prev_frame_gray: Optional[np.ndarray] = None
+        self._prev_points: Optional[np.ndarray] = None  # shape (N,1,2) from goodFeaturesToTrack
 
     def estimate(self, frame_bgr: np.ndarray, dets_xyxy: Optional[np.ndarray] = None) -> np.ndarray:
+        """
+        Estimate global affine transform H (2x3) from previous frame to current frame.
+
+        Args:
+            frame_bgr: Current frame in BGR format (uint8), shape (H, W, 3).
+            dets_xyxy: Optional detections (N,4) in xyxy format, in original image scale.
+                Used only by ORB method for masking out object regions (background-only features).
+
+        Returns:
+            H: Affine transform matrix of shape (2, 3), dtype float32.
+               Identity if not enough correspondences or if not initialized yet.
+        """
         if frame_bgr is None:
             return np.eye(2, 3, dtype=np.float32)
 
+        if self.cfg.method == "orb":
+            return self._estimate_orb(frame_bgr, dets_xyxy)
+
+        if self.cfg.method == "sparseOptFlow":
+            return self._estimate_sparse_optflow(frame_bgr)
+
+        # fallback
+        return np.eye(2, 3, dtype=np.float32)
+
+    def _estimate_orb(self, frame_bgr: np.ndarray, dets_xyxy: Optional[np.ndarray] = None) -> np.ndarray:
+        """
+        ORB-based affine estimation.
+
+        Steps:
+            1) Convert to grayscale (+ optional downscale).
+            2) Create ROI mask and optionally mask out detections (background emphasis).
+            3) Detect FAST keypoints and compute ORB descriptors.
+            4) KNN match descriptors against previous frame (ratio test).
+            5) Filter matches by max spatial displacement and by 2.5*std inliers.
+            6) Estimate affine transform with RANSAC.
+            7) Scale translation back up if downscaled.
+
+        Args:
+            frame_bgr: Current BGR frame.
+            dets_xyxy: Optional detection boxes for masking (original image scale).
+
+        Returns:
+            H: (2,3) affine transform mapping previous-current, float32.
+        """
         H_img, W_img = frame_bgr.shape[:2]
         gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
 
-        # Downscale for speed / robustness
         if self.downscale > 1:
             gray = cv2.resize(gray, (W_img // self.downscale, H_img // self.downscale))
         H, W = gray.shape[:2]
 
-        # Build mask: central ROI + remove detections (background features)
         mask = np.zeros_like(gray, dtype=np.uint8)
         y0 = int(self.cfg.roi_min_frac * H)
         y1 = int(self.cfg.roi_max_frac * H)
@@ -71,10 +255,13 @@ def estimate(self, frame_bgr: np.ndarray, dets_xyxy: Optional[np.ndarray] = None
         if dets_xyxy is not None and len(dets_xyxy) > 0:
             dets = np.asarray(dets_xyxy, dtype=np.float32) / float(self.downscale)
             dets = dets.astype(np.int32)
+
+            # Safety clipping
             dets[:, 0] = np.clip(dets[:, 0], 0, W - 1)
             dets[:, 2] = np.clip(dets[:, 2], 0, W - 1)
             dets[:, 1] = np.clip(dets[:, 1], 0, H - 1)
             dets[:, 3] = np.clip(dets[:, 3], 0, H - 1)
+
             for x1b, y1b, x2b, y2b in dets:
                 if x2b > x1b and y2b > y1b:
                     mask[y1b:y2b, x1b:x2b] = 0
@@ -85,20 +272,17 @@ def estimate(self, frame_bgr: np.ndarray, dets_xyxy: Optional[np.ndarray] = None
 
         H_aff = np.eye(2, 3, dtype=np.float32)
 
-        # First frame: only initialize
         if not self._initialized:
             self._prev_kps = copy.copy(kps)
             self._prev_desc = None if desc is None else copy.copy(desc)
             self._initialized = True
             return H_aff
 
-        # If missing descriptors
         if self._prev_desc is None or desc is None or len(desc) == 0:
             self._prev_kps = copy.copy(kps)
             self._prev_desc = None if desc is None else copy.copy(desc)
             return H_aff
 
-        # KNN match (k=2) + ratio test
         knn = self.matcher.knnMatch(self._prev_desc, desc, k=2)
         if len(knn) == 0:
             self._prev_kps = copy.copy(kps)
@@ -148,14 +332,119 @@ def estimate(self, frame_bgr: np.ndarray, dets_xyxy: Optional[np.ndarray] = None
                         H_aff[0, 2] *= self.downscale
                         H_aff[1, 2] *= self.downscale
 
-        # Update prev
         self._prev_kps = copy.copy(kps)
         self._prev_desc = copy.copy(desc)
+        return H_aff
+
+    def _estimate_sparse_optflow(self, frame_bgr: np.ndarray) -> np.ndarray:
+        """
+        Sparse optical-flow-based affine estimation.
+
+        Steps:
+            1) grayscale (+ optional downscale)
+            2) detect corners using goodFeaturesToTrack
+            3) compute correspondences via calcOpticalFlowPyrLK(prev, curr, prev_points)
+            4) keep only points with status == 1
+            5) estimate affine transform with RANSAC
+            6) scale translation back up if downscaled
+
+        Args:
+            frame_bgr: Current BGR frame.
+
+        Returns:
+            H: (2,3) affine transform mapping previous-current, float32.
+        """
+        H_img, W_img = frame_bgr.shape[:2]
+        frame = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
+
+        H_aff = np.eye(2, 3, dtype=np.float32)
+
+        # Downscale
+        if self.downscale > 1:
+            frame = cv2.resize(frame, (W_img // self.downscale, H_img // self.downscale))
+
+        # Find keypoints in current frame
+        keypoints = cv2.goodFeaturesToTrack(frame, mask=None, **self.feature_params)
+
+        # First frame: init and return identity
+        if not self._initialized:
+            self._prev_frame_gray = frame.copy()
+            self._prev_points = copy.copy(keypoints)
+            self._initialized = True
+            return H_aff
+
+        # If we don't have points, re-init
+        if self._prev_frame_gray is None or self._prev_points is None or keypoints is None:
+            self._prev_frame_gray = frame.copy()
+            self._prev_points = copy.copy(keypoints)
+            return H_aff
+
+        # Optical flow correspondences
+        # calcOpticalFlowPyrLK will throw or return nonsense if we give it None
+        matched, status, _err = cv2.calcOpticalFlowPyrLK(self._prev_frame_gray, frame, self._prev_points, None)
+
+        if status is None or matched is None:
+            self._prev_frame_gray = frame.copy()
+            self._prev_points = copy.copy(keypoints)
+            return H_aff
+
+        # Keep only good correspondences
+        prev_pts = []
+        curr_pts = []
+        # status is (N,1) or (N,)
+        status_flat = status.reshape(-1)
+
+        for i in range(len(status_flat)):
+            if status_flat[i]:
+                prev_pts.append(self._prev_points[i])
+                curr_pts.append(matched[i])
+
+        prev_pts = np.array(prev_pts)
+        curr_pts = np.array(curr_pts)
+
+        # Find rigid matrix
+        # if (np.size(prev_pts, 0) > 4) and (np.size(prev_pts, 0) == np.size(prev_pts, 0)):
+        if (np.size(prev_pts, 0) > 4) and (np.size(prev_pts, 0) == np.size(curr_pts, 0)):
+            H_est, _ = cv2.estimateAffinePartial2D(prev_pts, curr_pts, cv2.RANSAC)
+            if H_est is not None:
+                H_aff = H_est.astype(np.float32)
+
+                # Handle downscale translation back to original image coords
+                if self.downscale > 1:
+                    H_aff[0, 2] *= self.downscale
+                    H_aff[1, 2] *= self.downscale
+        else:
+            print('Warning: not enough matching points')
+
+        # Store to next iteration
+        self._prev_frame_gray = frame.copy()
+        # self._prev_points = copy.copy(keypoints)
+        self._prev_points = None if keypoints is None else keypoints.copy()
 
         return H_aff
 
     @staticmethod
     def apply_to_tracks(tracks: list, H: np.ndarray) -> None:
+        """
+        Apply affine transform H (2x3) to tracker states and covariances in-place.
+
+        This implementation assumes each track has:
+            - `state`: (8,1) float vector [x1,y1,x2,y2,vx,vy,vx2,vy2]^T
+            - `P`: (8,8) covariance matrix
+
+        The transform is applied as:
+            state := A * state + translation
+            P     := A * P * A^T
+
+        Where A applies the 2x2 rotation/shear block to each 2D component block in the state.
+
+        Args:
+            tracks: List of track objects with `.state` and `.P` attributes.
+            H: Affine transform (2,3) mapping prev -> curr.
+
+        Returns:
+            None. Tracks are modified in-place.
+        """
         if H is None or len(tracks) == 0:
             return
 
@@ -163,12 +452,10 @@ def apply_to_tracks(tracks: list, H: np.ndarray) -> None:
         R = H[:2, :2]
         t = H[:2, 2:3]  # (2,1)
 
-        # A4 maps [x1,y1,x2,y2]
         A4 = np.zeros((4, 4), dtype=np.float32)
         A4[0:2, 0:2] = R
         A4[2:4, 2:4] = R
 
-        # A8 maps state (pos and vel blocks)
         A8 = np.zeros((8, 8), dtype=np.float32)
         A8[0:4, 0:4] = A4
         A8[4:8, 4:8] = A4
@@ -178,4 +465,4 @@ def apply_to_tracks(tracks: list, H: np.ndarray) -> None:
         for trk in tracks:
             trk.state = (A8 @ trk.state).astype(np.float32)
             trk.state[0:4] += trans4
-            trk.P = (A8 @ trk.P @ A8.T).astype(np.float32)
+            trk.P = (A8 @ trk.P @ A8.T).astype(np.float32)
\ No newline at end of file
diff --git a/trackers/core/botsort/tracker.py b/trackers/core/botsort/tracker.py
index c9ebcdba..c49808a2 100644
--- a/trackers/core/botsort/tracker.py
+++ b/trackers/core/botsort/tracker.py
@@ -13,13 +13,40 @@
 
 from trackers.core.base import BaseTracker
 from trackers.core.botsort.kalman_box_tracker import BoTSORTKalmanBoxTracker
-from trackers.utils.sort_utils import (
+from trackers.core.botsort.utils import (
     get_alive_trackers,
     get_iou_matrix,
 )
 from trackers.core.botsort.cmc import CMC, CMCConfig
 
 class BoTSORTTracker(BaseTracker):
+    """
+    BoT-SORT-style multi-object tracker (IoU association + optional CMC).
+
+    The tracker maintains a list of active tracks (Kalman-filter-based) and, for each frame,
+    performs:
+      1) Predict existing track states (Kalman predict)
+      2) Split detections into high/low confidence groups
+      3) Apply camera motion compensation to predicted tracks
+      4) Associate high-confidence detections to tracks (IoU + assignment)
+      5) Associate low-confidence detections to remaining tracks
+      6) Spawn new tracks from unmatched high-confidence detections
+      7) Remove tracks that have been lost for too long
+
+    Parameters in __init__ control thresholds and lifecycle logic similarly to ByteTrack/BoT-SORT.
+
+    Attributes:
+        tracks: List of active `BoTSORTKalmanBoxTracker` objects.
+        maximum_frames_without_update: Max number of consecutive frames a track can go unmatched
+            before being removed.
+        minimum_consecutive_frames: Track maturity threshold before assigning a permanent ID.
+        minimum_iou_threshold: Minimum IoU required for a valid match.
+        track_activation_threshold: Confidence threshold for spawning a new track.
+        high_conf_det_threshold: Confidence threshold splitting detections into high/low groups.
+        enable_cmc: Whether to run camera motion compensation each frame (if `cmc` is set).
+        cmc: Camera motion compensation instance (or None if disabled).
+    """
+
     def __init__(
         self,
         lost_track_buffer: int = 30,
@@ -28,9 +55,36 @@ def __init__(
         minimum_consecutive_frames: int = 2,
         minimum_iou_threshold: float = 0.1,
         high_conf_det_threshold: float = 0.6,
-        enable_cmc: bool = True
+        enable_cmc: bool = True,
+        # cmc_method: str = "orb",
+        cmc_method: str = "sparseOptFlow",
+        cmc_downscale: int = 2,
 
     ) -> None:
+        """
+        Initialize the tracker.
+
+        Args:
+            lost_track_buffer: Time buffer (in frames at 30 FPS) for keeping lost tracks alive
+                before deletion. This is scaled by `frame_rate`.
+            frame_rate: Video frame rate used to scale the lost track buffer to time-like behavior.
+            track_activation_threshold: Minimum detection confidence to spawn a new track.
+            minimum_consecutive_frames: Number of successful updates required before assigning
+                a stable track ID (different than initial -1).
+            minimum_iou_threshold: Minimum IoU to accept a detection-track association.
+            high_conf_det_threshold: Confidence threshold used to split detections into:
+                - high confidence: confidence >= threshold
+                - low confidence:  confidence < threshold
+            enable_cmc: Whether to enable camera motion compensation (CMC).
+            cmc_method: CMC method string passed into `CMCConfig(method=...)`. Supported values
+                depend on `CMC` (e.g. "orb", "sparseOptFlow"). See CMCConfig.
+            cmc_downscale: Downscale factor used inside CMC for speed/robustness.
+
+        Notes:
+            - `maximum_frames_without_update` is computed as:
+                int(frame_rate / 30.0 * lost_track_buffer)
+              to maintain consistent “seconds” worth of buffer across different FPS.
+        """
         # Calculate maximum frames without update based on lost_track_buffer and
         # frame_rate. This scales the buffer based on the frame rate to ensure
         # consistent time-based tracking across different frame rates.
@@ -42,7 +96,7 @@ def __init__(
         self.tracks: list[BoTSORTKalmanBoxTracker] = []
 
         self.enable_cmc = enable_cmc
-        self.cmc = CMC(CMCConfig()) if enable_cmc else None
+        self.cmc = CMC(CMCConfig(method=cmc_method, downscale=cmc_downscale)) if enable_cmc else None
 
     def _update_detections(
         self,
@@ -51,6 +105,26 @@ def _update_detections(
         updated_detections: list[sv.Detections],
         matched_indices: list[tuple[int, int]],
     ) -> list[sv.Detections]:
+        """
+        Apply matched detection updates to tracks and append corresponding outputs.
+
+        For each (track_idx, det_idx) match:
+        - Update the track’s Kalman state with the detection bbox.
+        - If the track is “mature” (>= minimum_consecutive_frames) and still has tracker_id == -1,
+          assign a new unique tracker ID.
+        - Create a single-row `sv.Detections` object for the matched detection and set its
+          tracker_id to the track ID (or -1 if not mature yet).
+        - Append it to `updated_detections`.
+
+        Args:
+            tracks: Tracks being updated.
+            detections: Detections used for update.
+            updated_detections: Accumulator list of per-detection outputs for this frame.
+            matched_indices: List of (track_row_index, detection_col_index) pairs.
+
+        Returns:
+            The same `updated_detections` list, returned for convenience.
+        """
         # Update matched tracks with assigned detections.
         det_bboxes = detections.xyxy
         for row, col in matched_indices:
@@ -75,6 +149,27 @@ def update(
         detections: sv.Detections,
         frame: np.ndarray,
     ) -> sv.Detections:
+        """
+        Update the tracker with detections from the current frame.
+
+        This is the main per-frame entry point.
+
+        Args:
+            detections: Supervision detections for the current frame. Must include `.xyxy`.
+                Confidence (`detections.confidence`) is optional but recommended.
+                The method writes/overwrites `detections.tracker_id`.
+            frame: Current video frame in BGR format (H, W, 3), required if CMC is enabled.
+
+        Returns:
+            A merged `sv.Detections` object containing detections from this frame with
+            `tracker_id` assigned:
+              - >= 0 indicates a confirmed track ID
+              - -1 indicates unconfirmed/untracked (e.g., new / low confidence / not yet mature)
+
+        Notes:
+            - If CMC is enabled, the tracker estimates a global affine transform (2x3) from the
+              frame and uses it to warp predicted track states before association.
+        """
         if len(self.tracks) == 0 and len(detections) == 0:
             detections.tracker_id = np.array([], dtype=int)
             return detections
diff --git a/trackers/core/botsort/utils.py b/trackers/core/botsort/utils.py
new file mode 100644
index 00000000..078542e4
--- /dev/null
+++ b/trackers/core/botsort/utils.py
@@ -0,0 +1,153 @@
+# ------------------------------------------------------------------------
+# Trackers
+# Copyright (c) 2026 Roboflow. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------
+
+from collections.abc import Sequence
+from copy import deepcopy
+from typing import TypeVar
+
+import numpy as np
+import supervision as sv
+
+from trackers.core.botsort.kalman_box_tracker import BoTSORTKalmanBoxTracker
+
+KalmanBoxTrackerType = TypeVar(
+    "KalmanBoxTrackerType", bound=BoTSORTKalmanBoxTracker 
+)
+
+BoTSORTKalmanBoxTracker
+
+
+def get_alive_trackers(
+    trackers: Sequence[KalmanBoxTrackerType],
+    minimum_consecutive_frames: int,
+    maximum_frames_without_update: int,
+) -> list[KalmanBoxTrackerType]:
+    """
+    Remove dead or immature lost tracklets and get alive trackers
+    that are within `maximum_frames_without_update` AND (it's mature OR
+    it was just updated).
+
+    Args:
+        trackers: List of KalmanBoxTracker objects.
+        minimum_consecutive_frames: Number of consecutive frames that an object
+            must be tracked before it is considered a 'valid' track.
+        maximum_frames_without_update: Maximum number of frames without update
+            before a track is considered dead.
+
+    Returns:
+        List of alive trackers.
+    """
+    alive_trackers = []
+    for tracker in trackers:
+        is_mature = tracker.number_of_successful_updates >= minimum_consecutive_frames
+        is_active = tracker.time_since_update == 0
+        if tracker.time_since_update < maximum_frames_without_update and (
+            is_mature or is_active
+        ):
+            alive_trackers.append(tracker)
+    return alive_trackers
+
+
+def get_iou_matrix(
+    trackers: Sequence[KalmanBoxTrackerType], detection_boxes: np.ndarray
+) -> np.ndarray:
+    """
+    Build IOU cost matrix between detections and predicted bounding boxes
+
+    Args:
+        trackers: List of KalmanBoxTracker objects.
+        detection_boxes: Detected bounding boxes in the
+            form [x1, y1, x2, y2].
+
+    Returns:
+        IOU cost matrix.
+    """
+    predicted_boxes = np.array([t.get_state_bbox() for t in trackers])
+    if len(predicted_boxes) == 0 and len(trackers) > 0:
+        # Handle case where get_state_bbox might return empty array
+        predicted_boxes = np.zeros((len(trackers), 4), dtype=np.float32)
+
+    if len(trackers) > 0 and len(detection_boxes) > 0:
+        iou_matrix = sv.box_iou_batch(predicted_boxes, detection_boxes)
+    else:
+        iou_matrix = np.zeros((len(trackers), len(detection_boxes)), dtype=np.float32)
+
+    return iou_matrix
+
+
+def update_detections_with_track_ids(
+    trackers: Sequence[KalmanBoxTrackerType],
+    detections: sv.Detections,
+    detection_boxes: np.ndarray,
+    minimum_iou_threshold: float,
+    minimum_consecutive_frames: int,
+) -> sv.Detections:
+    """
+    The function prepares the updated Detections with track IDs.
+    If a tracker is "mature" (>= `minimum_consecutive_frames`) or recently updated,
+    it is assigned an ID to the detection that just updated it.
+
+    Args:
+        trackers: List of BoTSORTKalmanBoxTracker objects.
+        detections: The latest set of object detections.
+        detection_boxes: Detected bounding boxes in the
+            form [x1, y1, x2, y2].
+        minimum_iou_threshold: IOU threshold for associating detections to
+            existing tracks.
+        minimum_consecutive_frames: Number of consecutive frames that an object
+            must be tracked before it is considered a 'valid' track.
+
+    Returns:
+        A copy of the detections with `tracker_id` set
+            for each detection that is tracked.
+    """
+    # Re-run association in the same way (could also store direct mapping)
+    final_tracker_ids = [-1] * len(detection_boxes)
+
+    # Recalculate predicted_boxes based on current trackers after some may have
+    # been removed
+    predicted_boxes = np.array([t.get_state_bbox() for t in trackers])
+    iou_matrix_final = np.zeros((len(trackers), len(detection_boxes)), dtype=np.float32)
+
+    # Ensure predicted_boxes is properly shaped before the second iou calculation
+    if len(predicted_boxes) == 0 and len(trackers) > 0:
+        predicted_boxes = np.zeros((len(trackers), 4), dtype=np.float32)
+
+    if len(trackers) > 0 and len(detection_boxes) > 0:
+        iou_matrix_final = sv.box_iou_batch(predicted_boxes, detection_boxes)
+
+    row_indices, col_indices = np.where(iou_matrix_final > minimum_iou_threshold)
+    sorted_pairs = sorted(
+        zip(row_indices, col_indices),
+        key=lambda x: iou_matrix_final[x[0], x[1]],
+        reverse=True,
+    )
+    used_rows: set[int] = set()
+    used_cols: set[int] = set()
+    for row, col in sorted_pairs:
+        # Double check index is in range
+        if row < len(trackers):
+            tracker_obj = trackers[int(row)]
+            # Only assign if the track is "mature" or is new but has enough hits
+            if (int(row) not in used_rows) and (int(col) not in used_cols):
+                if (
+                    tracker_obj.number_of_successful_updates
+                    >= minimum_consecutive_frames
+                ):
+                    # If tracker is mature but still has ID -1, assign a new ID
+                    if tracker_obj.tracker_id == -1:
+                        tracker_obj.tracker_id = (
+                            BoTSORTKalmanBoxTracker.get_next_tracker_id()
+                        )
+                    final_tracker_ids[int(col)] = tracker_obj.tracker_id
+                used_rows.add(int(row))
+                used_cols.add(int(col))
+
+    # Assign tracker IDs to the returned Detections
+    updated_detections = deepcopy(detections)
+    updated_detections.tracker_id = np.array(final_tracker_ids)
+
+    return updated_detections

From 13db13687603a5f5bcf55f149b6cd39e08b7a423 Mon Sep 17 00:00:00 2001
From: Tomasz Stanczyk <stanior666@gmail.com>
Date: Wed, 4 Mar 2026 17:05:47 +0100
Subject: [PATCH 03/14] Add SIFT cmc to BoT-SORT. Refactor code. Update code
 documentation.

---
 trackers/core/botsort/cmc.py     | 180 +++++++++++++++++++++----------
 trackers/core/botsort/tracker.py |  90 +++++++++-------
 2 files changed, 174 insertions(+), 96 deletions(-)

diff --git a/trackers/core/botsort/cmc.py b/trackers/core/botsort/cmc.py
index 4fec6eec..5db0cb74 100644
--- a/trackers/core/botsort/cmc.py
+++ b/trackers/core/botsort/cmc.py
@@ -4,8 +4,6 @@
 # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 # ------------------------------------------------------------------------
 
-from __future__ import annotations
-
 from dataclasses import dataclass
 from typing import Optional, Literal
 
@@ -13,25 +11,35 @@
 import numpy as np
 import cv2
 
-CMCTMethod = Literal["orb", "sparseOptFlow"]
+CMCTMethod = Literal["orb", "sift", "sparseOptFlow"]
 
 @dataclass
 class CMCConfig:
     """
     Configuration for camera motion compensation (CMC).
 
-    The CMC module estimates a global 2D affine transform `H` (2x3) between consecutive frames.
-    This transform is then applied to predicted track states before data association.
+    The CMC module estimates a global 2D affine transform `H` (2x3) between consecutive 
+    frames. This transform is then applied to predicted track states before data 
+    association.
 
     Attributes:
         method:
             Camera motion estimation method.
 
-            - "orb": Feature matching using FAST keypoints + ORB descriptors + BFMatcher,
+            - "orb": Feature matching using 
+              FAST keypoints + ORB descriptors + BFMatcher (Hamming),
               followed by robust affine estimation (RANSAC).
-              Optionally masks out detection boxes so features are extracted from background.
+              Optionally masks out detection boxes so features are extracted from
+              background.
+            - "sift": Feature matching using 
+              SIFT keypoints + SIFT descriptors + BFMatcher (L2), 
+              followed by robust affine estimation (RANSAC). 
+              Optionally masks out detection boxes so features are extracted from
+              background. "sift" generally produces fewer but more distinctive matches 
+              than ORB at higher compute cost.
             - "sparseOptFlow": Sparse optical flow using corner tracking:
-              goodFeaturesToTrack -> calcOpticalFlowPyrLK -> robust affine estimation (RANSAC).
+              goodFeaturesToTrack -> calcOpticalFlowPyrLK -> robust affine estimation 
+              (RANSAC).
 
         downscale:
             Integer downscale factor applied to frames before running CMC.
@@ -42,41 +50,61 @@ class CMCConfig:
             Behavior:
             - Frames are resized to (W//downscale, H//downscale) for motion estimation.
             - The resulting affine translation components H[0,2], H[1,2] are scaled back
-              by multiplying by `downscale`, so the transform is in original image coordinates.
+              by multiplying by `downscale`, so the transform is in original image 
+              coordinates.
 
         fast_threshold:
             (ORB only) Threshold for the FAST keypoint detector.
-            Higher values yield fewer keypoints (more selective); lower values yield more keypoints.
+            Higher values yield fewer keypoints (more selective); lower values yield 
+            more keypoints.
 
         ransac_reproj_threshold:
             (ORB only) RANSAC reprojection threshold in pixels passed to
-            OpenCV’s affine estimation. It controls how far a point is allowed to deviate from the
-            estimated model while still being counted as an inlier.
-            Smaller values are stricter (reject more matches); larger values are more tolerant.
+            OpenCV’s affine estimation. It controls how far a point is allowed to 
+            deviate from the estimated model while still being counted as an inlier.
+            Smaller values are stricter (reject more matches); larger values are more 
+            tolerant.
 
         max_spatial_distance_frac:
-            (ORB only) Maximum allowed spatial displacement for a tentative match, expressed as a
-            fraction of (image width, image height) *after downscale*.
+            (ORB only) Maximum allowed spatial displacement for a tentative match, 
+            expressed as a fraction of (image width, image height) *after downscale*.
 
             Example:
                 If max_spatial_distance_frac = 0.25 and the downscaled frame is (W, H),
                 then a match is rejected if |dx| >= 0.25*W or |dy| >= 0.25*H.
 
             Motivation:
-                Reject obviously incorrect descriptor matches whose displacement is implausibly large.
+                Reject obviously incorrect descriptor matches whose displacement is 
+                implausibly large.
 
         roi_min_frac:
-            (ORB only) Lower bound of the region-of-interest (ROI) used to select keypoints,
-            expressed as a fraction of frame size. Points outside the ROI are masked out.
+            (ORB only) Lower bound of the region-of-interest (ROI) used to select 
+            keypoints, expressed as a fraction of frame size. Points outside the ROI 
+            are masked out.
 
             Example:
                 roi_min_frac=0.02 means we ignore a ~2% border on each side.
 
         roi_max_frac:
-            (ORB only) Upper bound of the ROI used to select keypoints (fraction of frame size).
-            Together with roi_min_frac, it defines a central rectangle:
+            (ORB only) Upper bound of the ROI used to select keypoints (fraction of 
+            frame size). Together with roi_min_frac, it defines a central rectangle:
                 [roi_min_frac..roi_max_frac] in both x and y.
 
+        sift_n_octave_layers: 
+            (SIFT only) Number of octave layers used by SIFT when constructing the 
+            scale-space pyramid. Increasing this can increase sensitivity to scale 
+            changes, at higher compute cost.
+
+        sift_contrast_threshold: 
+            (SIFT only) Threshold controlling how sensitive SIFT is 
+            to low-contrast keypoints. Lower values generally produce more keypoints; 
+            higher values are stricter.
+
+        sift_edge_threshold: 
+            (SIFT only) Threshold controlling rejection of keypoints on edges. 
+            Lower values reject more edge-like responses; higher values are more 
+            permissive.
+            
         sof_max_corners:
             (SparseOptFlow only) `maxCorners` passed to `cv2.goodFeaturesToTrack`.
             Maximum number of corners to detect for tracking.
@@ -84,36 +112,46 @@ class CMCConfig:
 
         sof_quality_level:
             (SparseOptFlow only) `qualityLevel` passed to `cv2.goodFeaturesToTrack`.
-            Minimum accepted quality of corners. A higher value keeps only stronger corners;
-            a lower value yields more corners (including weaker ones).
+            Minimum accepted quality of corners. A higher value keeps only stronger 
+            corners; a lower value yields more corners (including weaker ones).
 
         sof_min_distance:
             (SparseOptFlow only) `minDistance` passed to `cv2.goodFeaturesToTrack`.
             Minimum Euclidean distance (in pixels) between returned corners.
-            Higher values produce more spatially spread points; lower values allow clustering.
+            Higher values produce more spatially spread points; lower values allow 
+            clustering.
 
         sof_block_size:
             (SparseOptFlow only) `blockSize` passed to `cv2.goodFeaturesToTrack`.
-            Size of the neighborhood used to compute corner quality (structure tensor window).
+            Size of the neighborhood used to compute corner quality (structure tensor 
+            window).
 
         sof_use_harris:
-            (SparseOptFlow only) `useHarrisDetector` passed to `cv2.goodFeaturesToTrack`.
-            If True, uses the Harris corner measure; if False, uses the Shi-Tomasi measure.
+            (SparseOptFlow only) `useHarrisDetector` passed to 
+            `cv2.goodFeaturesToTrack`. If True, uses the Harris corner measure; 
+            if False, uses the Shi-Tomasi measure.
 
         sof_k:
             (SparseOptFlow only) `k` passed to `cv2.goodFeaturesToTrack`.
             Harris detector free parameter. Ignored if `sof_use_harris` is False.
     """
-    method: CMCTMethod = "orb"
+    method: CMCTMethod = "sparseOptFlow"
     downscale: int = 2
 
-    # ORB parameters
-    fast_threshold: int = 20
+    # Shared ORB and SIFT parameters (_estimate_feature_affine)
     ransac_reproj_threshold: float = 3.0
     max_spatial_distance_frac: float = 0.25
     roi_min_frac: float = 0.02
     roi_max_frac: float = 0.98
 
+    # ORB parameters
+    fast_threshold: int = 20
+
+    # SIFT parameters
+    sift_n_octave_layers: int = 3
+    sift_contrast_threshold: float = 0.02
+    sift_edge_threshold: int = 20
+
     # Sparse optical flow parameters (goodFeaturesToTrack)
     sof_max_corners: int = 1000
     sof_quality_level: float = 0.01
@@ -137,7 +175,8 @@ class CMC:
 
     Notes:
         - H maps points from previous frame coordinates to current frame coordinates.
-        - This class does not perform any drawing/visualization; it only estimates transforms.
+        - This class does not perform any drawing/visualization; it only estimates 
+        transforms.
     """
 
     def __init__(self, cfg: Optional[CMCConfig] = None) -> None:
@@ -148,8 +187,8 @@ def __init__(self, cfg: Optional[CMCConfig] = None) -> None:
             cfg: Optional configuration. If None, defaults are used.
 
         Notes:
-            - ORB detector/extractor/matcher are only created if method == "orb".
-            - Sparse optical flow parameters are always initialized (cheap).
+            - Detector/extractor/matcher are only created if method is "orb" or "sift".
+            - feature_paramsare only created if method is "sparseOptFlow".
         """
         self.cfg = cfg or CMCConfig()
         self.downscale = max(1, int(self.cfg.downscale))
@@ -162,16 +201,27 @@ def __init__(self, cfg: Optional[CMCConfig] = None) -> None:
             self.detector = cv2.FastFeatureDetector_create(self.cfg.fast_threshold)
             self.extractor = cv2.ORB_create()
             self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING)
-
-        # SparseOptFlow params
-        self.feature_params = dict(
-            maxCorners=self.cfg.sof_max_corners,
-            qualityLevel=self.cfg.sof_quality_level,
-            minDistance=self.cfg.sof_min_distance,
-            blockSize=self.cfg.sof_block_size,
-            useHarrisDetector=self.cfg.sof_use_harris,
-            k=self.cfg.sof_k,
-        )
+        elif self.cfg.method == "sift":
+            self.detector = cv2.SIFT_create(
+                nOctaveLayers=self.cfg.sift_n_octave_layers,
+                contrastThreshold=self.cfg.sift_contrast_threshold,
+                edgeThreshold=int(self.cfg.sift_edge_threshold),
+            )
+            self.extractor = cv2.SIFT_create(
+                nOctaveLayers=self.cfg.sift_n_octave_layers,
+                contrastThreshold=self.cfg.sift_contrast_threshold,
+                edgeThreshold=int(self.cfg.sift_edge_threshold),
+            )
+            self.matcher = cv2.BFMatcher(cv2.NORM_L2)
+        elif self.cfg.method == "sparseOptFlow":
+            self.feature_params = dict(
+                maxCorners=self.cfg.sof_max_corners,
+                qualityLevel=self.cfg.sof_quality_level,
+                minDistance=self.cfg.sof_min_distance,
+                blockSize=self.cfg.sof_block_size,
+                useHarrisDetector=self.cfg.sof_use_harris,
+                k=self.cfg.sof_k,
+            )
 
         self.reset()
 
@@ -191,16 +241,20 @@ def reset(self) -> None:
 
         # SparseOptFlow state
         self._prev_frame_gray: Optional[np.ndarray] = None
-        self._prev_points: Optional[np.ndarray] = None  # shape (N,1,2) from goodFeaturesToTrack
 
-    def estimate(self, frame_bgr: np.ndarray, dets_xyxy: Optional[np.ndarray] = None) -> np.ndarray:
+        # shape (N,1,2) from goodFeaturesToTrack
+        self._prev_points: Optional[np.ndarray] = None  
+
+    def estimate(self, frame_bgr: np.ndarray, 
+                 dets_xyxy: Optional[np.ndarray] = None) -> np.ndarray:
         """
         Estimate global affine transform H (2x3) from previous frame to current frame.
 
         Args:
             frame_bgr: Current frame in BGR format (uint8), shape (H, W, 3).
-            dets_xyxy: Optional detections (N,4) in xyxy format, in original image scale.
-                Used only by ORB method for masking out object regions (background-only features).
+            dets_xyxy: Optional detections (N,4) in xyxy format, in original image 
+                scale. Used only by ORB method for masking out object regions 
+                (background-only features).
 
         Returns:
             H: Affine transform matrix of shape (2, 3), dtype float32.
@@ -209,8 +263,8 @@ def estimate(self, frame_bgr: np.ndarray, dets_xyxy: Optional[np.ndarray] = None
         if frame_bgr is None:
             return np.eye(2, 3, dtype=np.float32)
 
-        if self.cfg.method == "orb":
-            return self._estimate_orb(frame_bgr, dets_xyxy)
+        if self.cfg.method == "orb" or self.cfg.method == "sift":
+            return self._estimate_feature_affine(frame_bgr, dets_xyxy)
 
         if self.cfg.method == "sparseOptFlow":
             return self._estimate_sparse_optflow(frame_bgr)
@@ -218,14 +272,17 @@ def estimate(self, frame_bgr: np.ndarray, dets_xyxy: Optional[np.ndarray] = None
         # fallback
         return np.eye(2, 3, dtype=np.float32)
 
-    def _estimate_orb(self, frame_bgr: np.ndarray, dets_xyxy: Optional[np.ndarray] = None) -> np.ndarray:
+    def _estimate_feature_affine(self, frame_bgr: np.ndarray, 
+                      dets_xyxy: Optional[np.ndarray] = None) -> np.ndarray:
         """
-        ORB-based affine estimation.
+        Feature affine estimation. ORB-based or SIFT-based 
+        (different initializations of self.detector, self.extractor and self.matcher for
+        ORB and SIFT)
 
         Steps:
             1) Convert to grayscale (+ optional downscale).
             2) Create ROI mask and optionally mask out detections (background emphasis).
-            3) Detect FAST keypoints and compute ORB descriptors.
+            3) Detect FAST keypoints and compute ORB or SIFT descriptors.
             4) KNN match descriptors against previous frame (ratio test).
             5) Filter matches by max spatial displacement and by 2.5*std inliers.
             6) Estimate affine transform with RANSAC.
@@ -256,7 +313,7 @@ def _estimate_orb(self, frame_bgr: np.ndarray, dets_xyxy: Optional[np.ndarray] =
             dets = np.asarray(dets_xyxy, dtype=np.float32) / float(self.downscale)
             dets = dets.astype(np.int32)
 
-            # Safety clipping
+            # Safety clipping to avoid negative/out-of-bounds slicing
             dets[:, 0] = np.clip(dets[:, 0], 0, W - 1)
             dets[:, 2] = np.clip(dets[:, 2], 0, W - 1)
             dets[:, 1] = np.clip(dets[:, 1], 0, H - 1)
@@ -266,12 +323,13 @@ def _estimate_orb(self, frame_bgr: np.ndarray, dets_xyxy: Optional[np.ndarray] =
                 if x2b > x1b and y2b > y1b:
                     mask[y1b:y2b, x1b:x2b] = 0
 
-        # Detect + describe
+        # Detect + describe (ORB)
         kps = self.detector.detect(gray, mask)
         kps, desc = self.extractor.compute(gray, kps)
 
         H_aff = np.eye(2, 3, dtype=np.float32)
 
+        # First frame init
         if not self._initialized:
             self._prev_kps = copy.copy(kps)
             self._prev_desc = None if desc is None else copy.copy(desc)
@@ -289,7 +347,8 @@ def _estimate_orb(self, frame_bgr: np.ndarray, dets_xyxy: Optional[np.ndarray] =
             self._prev_desc = copy.copy(desc)
             return H_aff
 
-        max_spatial = self.cfg.max_spatial_distance_frac * np.array([W, H], dtype=np.float32)
+        max_spatial = self.cfg.max_spatial_distance_frac * np.array([W, H], 
+                                                                    dtype=np.float32)
 
         prev_pts = []
         curr_pts = []
@@ -361,7 +420,8 @@ def _estimate_sparse_optflow(self, frame_bgr: np.ndarray) -> np.ndarray:
 
         # Downscale
         if self.downscale > 1:
-            frame = cv2.resize(frame, (W_img // self.downscale, H_img // self.downscale))
+            frame = cv2.resize(frame, (W_img // self.downscale, 
+                                       H_img // self.downscale))
 
         # Find keypoints in current frame
         keypoints = cv2.goodFeaturesToTrack(frame, mask=None, **self.feature_params)
@@ -381,7 +441,8 @@ def _estimate_sparse_optflow(self, frame_bgr: np.ndarray) -> np.ndarray:
 
         # Optical flow correspondences
         # calcOpticalFlowPyrLK will throw or return nonsense if we give it None
-        matched, status, _err = cv2.calcOpticalFlowPyrLK(self._prev_frame_gray, frame, self._prev_points, None)
+        matched, status, _err = cv2.calcOpticalFlowPyrLK(self._prev_frame_gray, frame, 
+                                                         self._prev_points, None)
 
         if status is None or matched is None:
             self._prev_frame_gray = frame.copy()
@@ -403,7 +464,6 @@ def _estimate_sparse_optflow(self, frame_bgr: np.ndarray) -> np.ndarray:
         curr_pts = np.array(curr_pts)
 
         # Find rigid matrix
-        # if (np.size(prev_pts, 0) > 4) and (np.size(prev_pts, 0) == np.size(prev_pts, 0)):
         if (np.size(prev_pts, 0) > 4) and (np.size(prev_pts, 0) == np.size(curr_pts, 0)):
             H_est, _ = cv2.estimateAffinePartial2D(prev_pts, curr_pts, cv2.RANSAC)
             if H_est is not None:
@@ -436,7 +496,8 @@ def apply_to_tracks(tracks: list, H: np.ndarray) -> None:
             state := A * state + translation
             P     := A * P * A^T
 
-        Where A applies the 2x2 rotation/shear block to each 2D component block in the state.
+        Where A applies the 2x2 rotation/shear block to each 2D component block in the 
+        state.
 
         Args:
             tracks: List of track objects with `.state` and `.P` attributes.
@@ -460,7 +521,8 @@ def apply_to_tracks(tracks: list, H: np.ndarray) -> None:
         A8[0:4, 0:4] = A4
         A8[4:8, 4:8] = A4
 
-        trans4 = np.array([t[0, 0], t[1, 0], t[0, 0], t[1, 0]], dtype=np.float32).reshape(4, 1)
+        trans4 = np.array([t[0, 0], t[1, 0], t[0, 0], t[1, 0]], 
+                          dtype=np.float32).reshape(4, 1)
 
         for trk in tracks:
             trk.state = (A8 @ trk.state).astype(np.float32)
diff --git a/trackers/core/botsort/tracker.py b/trackers/core/botsort/tracker.py
index c49808a2..05717551 100644
--- a/trackers/core/botsort/tracker.py
+++ b/trackers/core/botsort/tracker.py
@@ -23,8 +23,8 @@ class BoTSORTTracker(BaseTracker):
     """
     BoT-SORT-style multi-object tracker (IoU association + optional CMC).
 
-    The tracker maintains a list of active tracks (Kalman-filter-based) and, for each frame,
-    performs:
+    The tracker maintains a list of active tracks (Kalman-filter-based) and, for each 
+    frame, performs:
       1) Predict existing track states (Kalman predict)
       2) Split detections into high/low confidence groups
       3) Apply camera motion compensation to predicted tracks
@@ -33,17 +33,21 @@ class BoTSORTTracker(BaseTracker):
       6) Spawn new tracks from unmatched high-confidence detections
       7) Remove tracks that have been lost for too long
 
-    Parameters in __init__ control thresholds and lifecycle logic similarly to ByteTrack/BoT-SORT.
+    Parameters in __init__ control thresholds and lifecycle logic similarly to 
+    ByteTrack.
 
     Attributes:
         tracks: List of active `BoTSORTKalmanBoxTracker` objects.
-        maximum_frames_without_update: Max number of consecutive frames a track can go unmatched
-            before being removed.
-        minimum_consecutive_frames: Track maturity threshold before assigning a permanent ID.
+        maximum_frames_without_update: Max number of consecutive frames a track can go 
+            unmatched before being removed.
+        minimum_consecutive_frames: Track maturity threshold before assigning a 
+            permanent ID.
         minimum_iou_threshold: Minimum IoU required for a valid match.
         track_activation_threshold: Confidence threshold for spawning a new track.
-        high_conf_det_threshold: Confidence threshold splitting detections into high/low groups.
-        enable_cmc: Whether to run camera motion compensation each frame (if `cmc` is set).
+        high_conf_det_threshold: Confidence threshold splitting detections into 
+            high/low groups.
+        enable_cmc: Whether to run camera motion compensation each frame 
+            (if `cmc` is set).
         cmc: Camera motion compensation instance (or None if disabled).
     """
 
@@ -56,7 +60,6 @@ def __init__(
         minimum_iou_threshold: float = 0.1,
         high_conf_det_threshold: float = 0.6,
         enable_cmc: bool = True,
-        # cmc_method: str = "orb",
         cmc_method: str = "sparseOptFlow",
         cmc_downscale: int = 2,
 
@@ -65,19 +68,22 @@ def __init__(
         Initialize the tracker.
 
         Args:
-            lost_track_buffer: Time buffer (in frames at 30 FPS) for keeping lost tracks alive
-                before deletion. This is scaled by `frame_rate`.
-            frame_rate: Video frame rate used to scale the lost track buffer to time-like behavior.
-            track_activation_threshold: Minimum detection confidence to spawn a new track.
-            minimum_consecutive_frames: Number of successful updates required before assigning
-                a stable track ID (different than initial -1).
+            lost_track_buffer: Time buffer (in frames at 30 FPS) for keeping lost tracks
+                alive before deletion. This is scaled by `frame_rate`.
+            frame_rate: Video frame rate used to scale the lost track buffer to 
+                time-like behavior.
+            track_activation_threshold: Minimum detection confidence to spawn a new 
+                track.
+            minimum_consecutive_frames: Number of successful updates required before 
+                assigning a stable track ID (different than initial -1).
             minimum_iou_threshold: Minimum IoU to accept a detection-track association.
             high_conf_det_threshold: Confidence threshold used to split detections into:
                 - high confidence: confidence >= threshold
                 - low confidence:  confidence < threshold
             enable_cmc: Whether to enable camera motion compensation (CMC).
-            cmc_method: CMC method string passed into `CMCConfig(method=...)`. Supported values
-                depend on `CMC` (e.g. "orb", "sparseOptFlow"). See CMCConfig.
+            cmc_method: CMC method string passed into `CMCConfig(method=...)`. 
+                Supported values depend on `CMC` (e.g. "orb", "sift", "sparseOptFlow"). 
+                See CMCConfig.
             cmc_downscale: Downscale factor used inside CMC for speed/robustness.
 
         Notes:
@@ -96,7 +102,8 @@ def __init__(
         self.tracks: list[BoTSORTKalmanBoxTracker] = []
 
         self.enable_cmc = enable_cmc
-        self.cmc = CMC(CMCConfig(method=cmc_method, downscale=cmc_downscale)) if enable_cmc else None
+        self.cmc = CMC(CMCConfig(method=cmc_method, 
+                                 downscale=cmc_downscale)) if enable_cmc else None
 
     def _update_detections(
         self,
@@ -110,16 +117,17 @@ def _update_detections(
 
         For each (track_idx, det_idx) match:
         - Update the track’s Kalman state with the detection bbox.
-        - If the track is “mature” (>= minimum_consecutive_frames) and still has tracker_id == -1,
-          assign a new unique tracker ID.
-        - Create a single-row `sv.Detections` object for the matched detection and set its
-          tracker_id to the track ID (or -1 if not mature yet).
+        - If the track is “mature” (>= minimum_consecutive_frames) and still has 
+          tracker_id == -1, assign a new unique tracker ID.
+        - Create a single-row `sv.Detections` object for the matched detection and set 
+          its tracker_id to the track ID (or -1 if not mature yet).
         - Append it to `updated_detections`.
 
         Args:
             tracks: Tracks being updated.
             detections: Detections used for update.
-            updated_detections: Accumulator list of per-detection outputs for this frame.
+            updated_detections: Accumulator list of per-detection outputs for this 
+                frame.
             matched_indices: List of (track_row_index, detection_col_index) pairs.
 
         Returns:
@@ -155,34 +163,39 @@ def update(
         This is the main per-frame entry point.
 
         Args:
-            detections: Supervision detections for the current frame. Must include `.xyxy`.
-                Confidence (`detections.confidence`) is optional but recommended.
-                The method writes/overwrites `detections.tracker_id`.
-            frame: Current video frame in BGR format (H, W, 3), required if CMC is enabled.
+            detections: Supervision detections for the current frame. Must include `
+                .xyxy`. Confidence (`detections.confidence`) is optional but 
+                recommended. The method writes/overwrites `detections.tracker_id`.
+            frame: Current video frame in BGR format (H, W, 3), required if CMC is 
+                enabled.
 
         Returns:
             A merged `sv.Detections` object containing detections from this frame with
             `tracker_id` assigned:
               - >= 0 indicates a confirmed track ID
-              - -1 indicates unconfirmed/untracked (e.g., new / low confidence / not yet mature)
+              - -1 indicates unconfirmed/untracked (e.g., new / low confidence / not yet
+                mature)
 
         Notes:
-            - If CMC is enabled, the tracker estimates a global affine transform (2x3) from the
-              frame and uses it to warp predicted track states before association.
+            - If CMC is enabled, the tracker estimates a global affine transform (2x3) 
+              from the frame and uses it to warp predicted track states before 
+              association.
         """
         if len(self.tracks) == 0 and len(detections) == 0:
             detections.tracker_id = np.array([], dtype=int)
             return detections
         updated_detections: list[
             sv.Detections
-        ] = []  # List for returning the updated detections with its new assigned track id # noqa: E501
+        ] = []  # List for returning the updated detections with its new assigned 
+                # track id # noqa: E501
 
         # Predict new locations for existing tracks
         for tracker in self.tracks:
             tracker.predict()
         # Assign a default tracker_id with the correct shape
         detections.tracker_id = -np.ones(len(detections))
-        # Split into high confidence boxes and lower based on self.high_conf_det_threshold # noqa: E501
+        # Split into high confidence boxes and lower based on 
+        # self.high_conf_det_threshold # noqa: E501
         high_prob_detections, low_prob_detections = (
             self._get_high_and_low_probability_detections(detections)
         )
@@ -291,8 +304,9 @@ def _get_associated_indices(
         assignment problem in an optimal way.
 
         Args:
-            similarity_matrix: Similarity matrix between tracks (rows) and detections (columns).
-            min_similarity_thresh: Minimum similarity threshold for a valid match.
+            similarity_matrix: Similarity matrix between tracks (rows) and detections 
+            (columns). min_similarity_thresh: Minimum similarity threshold for a valid 
+            match.
 
         Returns:
             Matched indices (list of (tracker_idx, detection_idx)), indices of
@@ -362,8 +376,9 @@ def _similarity_step(
         detections: sv.Detections,
         tracks: list[BoTSORTKalmanBoxTracker],
     ) -> tuple[list[tuple[int, int]], set[int], set[int]]:
-        """Measures similarity based on IoU between tracks and detections and returns the matches
-            and unmatched tracks/detections. Is used for step 1 and 2 of the BYTE algorithm.
+        """Measures similarity based on IoU between tracks and detections and returns 
+            the matches and unmatched tracks/detections. Is used for step 1 and 2 of the
+            BYTE algorithm.
 
         Args:
             detections: The set of object detections.
@@ -382,7 +397,8 @@ def _similarity_step(
         thresh = self.minimum_iou_threshold
 
         # Associate detections to tracks based on the higher value of the
-        # similarity matrix, using the Jonker-Volgenant algorithm (linear_sum_assignment). # noqa: E501
+        # similarity matrix, using the Jonker-Volgenant algorithm 
+        # (linear_sum_assignment). # noqa: E501
         matched_indices, unmatched_tracks, unmatched_detections = (
             self._get_associated_indices(similarity_matrix, thresh)
         )

From 1db6826f346b1fc88c87f1b79075355bb13cd6fc Mon Sep 17 00:00:00 2001
From: Tomasz Stanczyk <stanior666@gmail.com>
Date: Tue, 10 Mar 2026 15:30:10 +0100
Subject: [PATCH 04/14] Adjust Kalman filter as in original BoT-SORT

---
 trackers/core/botsort/cmc.py                | 126 ++++--
 trackers/core/botsort/kalman_box_tracker.py | 433 ++++++++++++++++----
 2 files changed, 460 insertions(+), 99 deletions(-)

diff --git a/trackers/core/botsort/cmc.py b/trackers/core/botsort/cmc.py
index 5db0cb74..0fc16f72 100644
--- a/trackers/core/botsort/cmc.py
+++ b/trackers/core/botsort/cmc.py
@@ -302,6 +302,7 @@ def _estimate_feature_affine(self, frame_bgr: np.ndarray,
             gray = cv2.resize(gray, (W_img // self.downscale, H_img // self.downscale))
         H, W = gray.shape[:2]
 
+        # Build mask: central ROI + remove detections (background features)
         mask = np.zeros_like(gray, dtype=np.uint8)
         y0 = int(self.cfg.roi_min_frac * H)
         y1 = int(self.cfg.roi_max_frac * H)
@@ -486,45 +487,120 @@ def _estimate_sparse_optflow(self, frame_bgr: np.ndarray) -> np.ndarray:
     @staticmethod
     def apply_to_tracks(tracks: list, H: np.ndarray) -> None:
         """
-        Apply affine transform H (2x3) to tracker states and covariances in-place.
+        Apply a global affine motion transform to tracker states and covariances in-place.
 
-        This implementation assumes each track has:
-            - `state`: (8,1) float vector [x1,y1,x2,y2,vx,vy,vx2,vy2]^T
-            - `P`: (8,8) covariance matrix
+        This method updates each track according to the affine transform
 
-        The transform is applied as:
-            state := A * state + translation
-            P     := A * P * A^T
+            x' = R x + t
 
-        Where A applies the 2x2 rotation/shear block to each 2D component block in the 
-        state.
+        where:
+            R:
+                2x2 linear part of the affine transform (rotation / shear / scale-like part).
+            t:
+                2D translation vector.
+
+        The input transform `H` is expected in standard OpenCV affine form:
+
+            H = [ R | t ]
+
+        with shape (2, 3).
+
+        Tracker state convention:
+            Each track is assumed to store its Kalman state as
+
+                [xc, yc, w, h, vxc, vyc, vw, vh]^T
+
+            where:
+                xc, yc:
+                    Bounding box center coordinates.
+                w, h:
+                    Bounding box width and height.
+                vxc, vyc:
+                    Velocities of the center coordinates.
+                vw, vh:
+                    Velocities of the width and height.
+
+        State update logic:
+            The affine transform is applied only to the geometric quantities that live in
+            the 2D image plane as position or velocity vectors:
+
+            1) Center position:
+                   [xc, yc]^T = R @ [xc, yc]^T + t
+
+            2) Center velocity:
+                   [vxc, vyc]^T = R @ [vxc, vyc]^T
+
+            3) Width, height, and their velocities:
+                   [w, h, vw, vh] remain unchanged
+
+        Why width and height are not transformed here:
+            Width and height are scalar box dimensions, not 2D point coordinates.
+            In this implementation, camera motion compensation is used to correct the
+            object center location and its image-plane velocity, while the box size terms
+            are left unchanged. This keeps the compensation simple and consistent with the
+            state representation used by the tracker.
+
+        Covariance update:
+            Each track also stores a covariance matrix `P` describing uncertainty in the
+            8D Kalman state. After the mean state is transformed, the covariance is updated
+            using the linear transform
+
+                P = A @ P @ A.T
+
+            where `A` is an 8x8 block matrix that applies `R` to:
+                - the center position block [xc, yc]
+                - the center velocity block [vxc, vyc]
+
+            and leaves the remaining state dimensions unchanged.
+
+            Concretely:
+                - A[0:2, 0:2] = R
+                - A[4:6, 4:6] = R
+                - all other diagonal entries remain 1
 
         Args:
-            tracks: List of track objects with `.state` and `.P` attributes.
-            H: Affine transform (2,3) mapping prev -> curr.
+            tracks:
+                List of track objects. Each track is expected to expose:
+                    - `state`: NumPy array of shape (8, 1)
+                    - `P`: NumPy array of shape (8, 8)
+            H:
+                Affine transform matrix of shape (2, 3), mapping previous-frame image
+                coordinates to current-frame image coordinates.
 
         Returns:
-            None. Tracks are modified in-place.
+            None.
+            The tracks are modified in-place.
+
+        Notes:
+            - If `H` is None or `tracks` is empty, this method does nothing.
+            - The method assumes that `H` has already been estimated in image coordinates
+              consistent with the tracker state.
+            - This method does not perform any validity checks on whether the estimated
+              transform is physically plausible; it simply applies the provided transform.
         """
         if H is None or len(tracks) == 0:
             return
 
         H = H.astype(np.float32)
         R = H[:2, :2]
-        t = H[:2, 2:3]  # (2,1)
+        t = H[:2, 2]
 
-        A4 = np.zeros((4, 4), dtype=np.float32)
-        A4[0:2, 0:2] = R
-        A4[2:4, 2:4] = R
+        for trk in tracks:
+            x = trk.state.reshape(-1)
 
-        A8 = np.zeros((8, 8), dtype=np.float32)
-        A8[0:4, 0:4] = A4
-        A8[4:8, 4:8] = A4
+            # Update the state mean using the affine transform.
+            pos = x[0:2]
+            vel = x[4:6]
 
-        trans4 = np.array([t[0, 0], t[1, 0], t[0, 0], t[1, 0]], 
-                          dtype=np.float32).reshape(4, 1)
+            x[0:2] = R @ pos + t
+            x[4:6] = R @ vel
 
-        for trk in tracks:
-            trk.state = (A8 @ trk.state).astype(np.float32)
-            trk.state[0:4] += trans4
-            trk.P = (A8 @ trk.P @ A8.T).astype(np.float32)
\ No newline at end of file
+            trk.state = x.reshape(8, 1).astype(np.float32)
+
+            # Update the state covariance under the corresponding linear transform.
+            A = np.eye(8, dtype=np.float32)
+            A[0:2, 0:2] = R      # center position
+            A[4:6, 4:6] = R      # center velocity
+            # Box size terms (w, h, vw, vh) are not transformed in this implementation.
+
+            trk.P = (A @ trk.P @ A.T).astype(np.float32)
\ No newline at end of file
diff --git a/trackers/core/botsort/kalman_box_tracker.py b/trackers/core/botsort/kalman_box_tracker.py
index dc19df67..90cc04b2 100644
--- a/trackers/core/botsort/kalman_box_tracker.py
+++ b/trackers/core/botsort/kalman_box_tracker.py
@@ -6,119 +6,412 @@
 
 import numpy as np
 
+
 class BoTSORTKalmanBoxTracker:
     """
-    The `BoTSORTKalmanBoxTracker` class represents the internals of a single
-    tracked object (bounding box), with a Kalman filter to predict and update
-    its position.
-
-    Attributes:
-        tracker_id: Unique identifier for the tracker.
-        number_of_successful_updates: Number of times the object has been
-            updated successfully.
-        time_since_update: Number of frames since the last update.
-        state: State vector of the bounding box.
-        F: State transition matrix.
-        H: Measurement matrix.
-        Q: Process noise covariance matrix.
-        R: Measurement noise covariance matrix.
-        P: Error covariance matrix.
-        count_id: Class variable to assign unique IDs to each tracker.
-
-    Args:
-        bbox: Initial bounding box in the form [x1, y1, x2, y2].
+    Kalman-filter-based state estimator for a single tracked object.
+
+    This class maintains the motion state of one object using a linear Kalman filter
+    with a constant-velocity model. The tracker stores the object state internally in
+    center-width-height form, but accepts detections and returns boxes in standard
+    corner format.
+
+    Internal state vector:
+        [xc, yc, w, h, vxc, vyc, vw, vh]^T
+
+    where:
+        xc, yc:
+            Bounding box center coordinates.
+        w, h:
+            Bounding box width and height.
+        vxc, vyc:
+            Velocities of the center coordinates.
+        vw, vh:
+            Velocities of the width and height.
+
+    Public input/output convention:
+        - input detections to `__init__()` and `update()` are expected in xyxy format:
+          [x1, y1, x2, y2]
+        - output from `get_state_bbox()` is returned in xyxy format:
+          [x1, y1, x2, y2]
+
+    Kalman filter matrices used in this class:
+        F:
+            State transition matrix. Propagates the state from one frame to the next
+            under a constant-velocity assumption.
+        H:
+            Measurement matrix. Maps the internal 8D state to the observable 4D
+            measurement space [xc, yc, w, h].
+        Q:
+            Process noise covariance. Models uncertainty in the motion model used
+            during prediction.
+        R:
+            Measurement noise covariance. Models uncertainty in incoming detections
+            during the update step.
+        P:
+            State covariance matrix. Represents the current uncertainty of the full
+            8D state estimate.
+
+    Lifecycle-related attributes:
+        tracker_id:
+            Permanent track identifier. Starts at -1 and is assigned later by the
+            outer tracking logic once the track is considered mature.
+        number_of_successful_updates:
+            Number of successful detection-based updates received by this track.
+        time_since_update:
+            Number of consecutive prediction steps since the last measurement update.
+
+    Notes:
+        - The process and measurement noise are scaled using the current object width
+          and height. This makes the uncertainty proportional to object size.
+        - Width and height are constrained to remain positive after prediction and
+          update to avoid degenerate boxes.
     """
 
     count_id = 0
 
     @classmethod
     def get_next_tracker_id(cls) -> int:
-        """
-        Class method that returns the next available tracker ID.
-
-        Returns:
-            The next available tracker ID.
-        """
         next_id = cls.count_id
         cls.count_id += 1
         return next_id
 
     def __init__(self, bbox: np.ndarray):
-        # Initialize with a temporary ID of -1
-        # Will be assigned a real ID when the track is considered mature
-        self.tracker_id = -1
+        """
+        Initialize a new track from the first observed bounding box.
+
+        Args:
+            bbox:
+                Initial detection in xyxy format: [x1, y1, x2, y2].
+
+        Initialization steps:
+            1) Set track-management attributes such as `tracker_id`,
+               `number_of_successful_updates`, and `time_since_update`.
+            2) Allocate the internal 8D Kalman state vector:
+                   [xc, yc, w, h, vxc, vyc, vw, vh]^T
+            3) Convert the input bounding box from xyxy to xywh form:
+                   [xc, yc, w, h]
+            4) Store that measurement in the position/size part of the state.
+            5) Initialize the Kalman filter matrices F, H, Q, R, and P.
 
-        # Number of hits indicates how many times the object has been
-        # updated successfully
+        Notes:
+            - Initial velocities are set to zero.
+            - The initial covariance matrix P is set in `_initialize_kalman_filter()`
+              and reflects uncertainty about both position/size and velocity.
+        """
+        self.tracker_id = -1
         self.number_of_successful_updates = 1
-        # Number of frames since the last update
         self.time_since_update = 0
 
-        # For simplicity, we keep a small state vector:
-        # (x, y, x2, y2, vx, vy, vx2, vy2).
-        # We'll store the bounding box in "self.state"
+        # State mean: [xc, yc, w, h, vxc, vyc, vw, vh]^T
         self.state = np.zeros((8, 1), dtype=np.float32)
 
-        # Initialize state directly from the first detection
-        self.state[0] = bbox[0]
-        self.state[1] = bbox[1]
-        self.state[2] = bbox[2]
-        self.state[3] = bbox[3]
+        # Initialize from first detection in xyxy
+        measurement = self.xyxy_to_xywh(bbox)
+        self.state[0:4, 0] = measurement
 
-        # Basic constant velocity model
-        self._initialize_kalman_filter()
+        self._initialize_kalman_filter(measurement)
 
-    def _initialize_kalman_filter(self) -> None:
+    @staticmethod
+    def xyxy_to_xywh(bbox: np.ndarray) -> np.ndarray:
         """
-        Sets up the matrices for the Kalman filter.
+        Convert a bounding box from corner format to center-size format.
+
+        Args:
+            bbox:
+                Bounding box in xyxy format: [x1, y1, x2, y2].
+
+        Returns:
+            Bounding box in xywh format: [xc, yc, w, h].
+        """
+        x1, y1, x2, y2 = bbox.astype(np.float32)
+        w = x2 - x1
+        h = y2 - y1
+        xc = x1 + w / 2.0
+        yc = y1 + h / 2.0
+        return np.array([xc, yc, w, h], dtype=np.float32)
+
+    @staticmethod
+    def xywh_to_xyxy(state_xywh: np.ndarray) -> np.ndarray:
+        """
+        Convert a bounding box from center-size format to corner format.
+
+        Args:
+            state_xywh:
+                Bounding box in xywh format: [xc, yc, w, h].
+
+        Returns:
+            Bounding box in xyxy format: [x1, y1, x2, y2].
+        """
+        xc, yc, w, h = state_xywh.astype(np.float32)
+        x1 = xc - w / 2.0
+        y1 = yc - h / 2.0
+        x2 = xc + w / 2.0
+        y2 = yc + h / 2.0
+        return np.array([x1, y1, x2, y2], dtype=np.float32)
+
+    def _initialize_kalman_filter(self, measurement: np.ndarray) -> None:
+        """
+        Initialize the Kalman filter matrices for the current track.
+
+        Args:
+            measurement:
+                Initial object measurement in xywh format:
+                [xc, yc, w, h].
+
+        This method initializes the following matrices:
+
+        State transition matrix:
+            F is an 8x8 matrix defining how the state evolves from one frame to the next.
+            It implements a constant-velocity model:
+                xc <- xc + vxc
+                yc <- yc + vyc
+                w  <- w  + vw
+                h  <- h  + vh
+            while the velocity terms are carried forward unchanged.
+
+        Measurement matrix:
+            H is a 4x8 matrix mapping the internal 8D state
+                [xc, yc, w, h, vxc, vyc, vw, vh]^T
+            to the observable 4D measurement
+                [xc, yc, w, h]^T.
+            In other words, only the first four state components are directly observed.
+
+        Process noise covariance:
+            Q is an 8x8 diagonal matrix representing uncertainty in the motion model
+            used during prediction. Larger values allow the predicted state to change
+            more freely from frame to frame.
+
+        Measurement noise covariance:
+            R is a 4x4 diagonal matrix representing uncertainty in the detector
+            measurements used during correction/update.
+
+        State covariance:
+            P is the initial 8x8 covariance matrix representing uncertainty in the
+            initial state estimate. The velocity terms are initialized with larger
+            uncertainty than the position/size terms because they are not directly
+            observed in the first frame.
+
+        Noise scaling:
+            The diagonal entries of Q, R, and P are scaled using the initial object
+            width and height. This makes the uncertainty proportional to object size:
+            larger objects are allowed proportionally larger absolute motion and noise.
+
+        Notes:
+            - `sigma_p` controls the scale of position/size process noise.
+            - `sigma_v` controls the scale of velocity process noise.
+            - `sigma_m` controls the scale of measurement noise.
+            - All covariance matrices are diagonal in this implementation.
         """
-        # State transition matrix (F): 8x8
-        # We assume a constant velocity model. Positions are incremented by
-        # velocity each step.
         self.F = np.eye(8, dtype=np.float32)
         for i in range(4):
             self.F[i, i + 4] = 1.0
 
-        # Measurement matrix (H): we directly measure x1, y1, x2, y2
-        self.H = np.eye(4, 8, dtype=np.float32)  # 4x8
+        self.H = np.eye(4, 8, dtype=np.float32)
+
+        # BoT-SORT-style scale-aware noise using width/height. 
+        sigma_p = 0.05
+        sigma_v = 0.00625
+        sigma_m = 0.05
 
-        # Process covariance matrix (Q)
-        self.Q = np.eye(8, dtype=np.float32) * 0.01
+        w, h = measurement[2], measurement[3]
+
+        q_diag = np.array([
+            (sigma_p * w) ** 2,
+            (sigma_p * h) ** 2,
+            (sigma_p * w) ** 2,
+            (sigma_p * h) ** 2,
+            (sigma_v * w) ** 2,
+            (sigma_v * h) ** 2,
+            (sigma_v * w) ** 2,
+            (sigma_v * h) ** 2,
+        ], dtype=np.float32)
+        self.Q = np.diag(q_diag)
+
+        r_diag = np.array([
+            (sigma_m * w) ** 2,
+            (sigma_m * h) ** 2,
+            (sigma_m * w) ** 2,
+            (sigma_m * h) ** 2,
+        ], dtype=np.float32)
+        self.R = np.diag(r_diag)
+
+        # Initial covariance, as in original BoT-SORT KF
+        p_diag = np.array([
+            (2 * sigma_p * w) ** 2,
+            (2 * sigma_p * h) ** 2,
+            (2 * sigma_p * w) ** 2,
+            (2 * sigma_p * h) ** 2,
+            (10 * sigma_v * w) ** 2,
+            (10 * sigma_v * h) ** 2,
+            (10 * sigma_v * w) ** 2,
+            (10 * sigma_v * h) ** 2,
+        ], dtype=np.float32)
+        self.P = np.diag(p_diag)
+
+    def _update_process_and_measurement_noise(self) -> None:
+        """
+        Recompute the process and measurement noise covariances from the current box size.
 
-        # Measurement covariance (R): noise in detection
-        self.R = np.eye(4, dtype=np.float32) * 0.1
+        This method updates:
 
-        # Error covariance matrix (P)
-        self.P = np.eye(8, dtype=np.float32)
+        Q:
+            Process noise covariance, used in the prediction step.
+            It models uncertainty in how the state changes from one frame to the next.
+
+        R:
+            Measurement noise covariance, used in the update step.
+            It models uncertainty in the current detection measurement.
+
+        Why this update is needed:
+            The scale of the uncertainty should depend on the current object size.
+            For example, a 2-pixel error is relatively more important for a small object
+            than for a large one. Therefore, the diagonal entries of Q and R are computed
+            from the current predicted width and height stored in the state.
+
+        Implementation details:
+            - Width and height are read from the current state:
+                  w = state[2], h = state[3]
+            - They are clamped to a small positive minimum to avoid zero or negative values.
+            - The resulting Q and R matrices remain diagonal.
+
+        Notes:
+            This method does not update P directly. It only refreshes the noise models
+            used later in `predict()` and `update()`.
+        """
+        sigma_p = 0.05
+        sigma_v = 0.00625
+        sigma_m = 0.05
+
+        w = max(float(self.state[2, 0]), 1e-3)
+        h = max(float(self.state[3, 0]), 1e-3)
+
+        q_diag = np.array([
+            (sigma_p * w) ** 2,
+            (sigma_p * h) ** 2,
+            (sigma_p * w) ** 2,
+            (sigma_p * h) ** 2,
+            (sigma_v * w) ** 2,
+            (sigma_v * h) ** 2,
+            (sigma_v * w) ** 2,
+            (sigma_v * h) ** 2,
+        ], dtype=np.float32)
+        self.Q = np.diag(q_diag)
+
+        r_diag = np.array([
+            (sigma_m * w) ** 2,
+            (sigma_m * h) ** 2,
+            (sigma_m * w) ** 2,
+            (sigma_m * h) ** 2,
+        ], dtype=np.float32)
+        self.R = np.diag(r_diag)
 
     def predict(self) -> None:
         """
-        Predict the next state of the bounding box (applies the state transition).
+        Predict the next state and covariance using the Kalman motion model.
+
+        This method performs the Kalman filter prediction step:
+
+            state <- F @ state
+            P     <- F @ P @ F.T + Q
+
+        where:
+            F:
+                State transition matrix.
+            P:
+                Current state covariance matrix.
+            Q:
+                Process noise covariance.
+
+        Effect of the prediction:
+            - The center position and box size are advanced using their current velocities.
+            - The covariance matrix P is propagated forward and increased by Q to reflect
+              additional uncertainty introduced during motion prediction.
+
+        Additional behavior:
+            - The process and measurement noise matrices are refreshed first by calling
+              `_update_process_and_measurement_noise()`.
+            - Width and height are clamped to remain positive after prediction.
+            - `time_since_update` is incremented because this frame has not yet received
+              a measurement update.
+
+        Notes:
+            This method does not use any detection input. It only extrapolates the track
+            state forward in time.
         """
+        self._update_process_and_measurement_noise()
+
         # Predict state
         self.state = self.F @ self.state
-        # Predict error covariance
+
+        # Predict error (uncertainty) covariance
         self.P = self.F @ self.P @ self.F.T + self.Q
 
+        # Prevent degenerate box shape
+        self.state[2, 0] = max(self.state[2, 0], 1e-3)
+        self.state[3, 0] = max(self.state[3, 0], 1e-3)
+
         # Increase time since update
         self.time_since_update += 1
 
     def update(self, bbox: np.ndarray) -> None:
         """
-        Updates the state with a new detected bounding box.
+        Correct the predicted state using a new detection.
 
         Args:
-            bbox: Detected bounding box in the form [x1, y1, x2, y2].
+            bbox:
+                Detection bounding box in xyxy format: [x1, y1, x2, y2].
+
+        This method performs the Kalman filter correction/update step:
+
+            measurement = xyxy_to_xywh(bbox)
+            S = H @ P @ H.T + R
+            K = P @ H.T @ inv(S)
+            y = measurement - H @ state
+            state = state + K @ y
+            P = (I - K @ H) @ P
+
+        where:
+            measurement:
+                Observed bounding box converted to [xc, yc, w, h].
+            S:
+                Innovation covariance. Represents uncertainty in the predicted
+                measurement.
+            K:
+                Kalman gain. Controls how strongly the state is corrected toward
+                the new measurement.
+            y:
+                Innovation (also called residual), i.e. the difference between the
+                observed measurement and the predicted measurement.
+            I:
+                Identity matrix of appropriate size.
+
+        Effect of the update:
+            - The predicted state is corrected toward the observed detection.
+            - The covariance matrix P is reduced to reflect increased confidence
+              after receiving a measurement.
+
+        Additional behavior:
+            - `time_since_update` is reset to zero.
+            - `number_of_successful_updates` is incremented.
+            - Width and height are clamped to remain positive after correction.
+
+        Notes:
+            The measurement only directly observes [xc, yc, w, h], not the velocity
+            terms. However, the velocity estimates can still change indirectly through
+            the Kalman gain and the state covariance structure.
         """
         self.time_since_update = 0
         self.number_of_successful_updates += 1
 
+        measurement = self.xyxy_to_xywh(bbox).reshape((4, 1))
+        self._update_process_and_measurement_noise()
+
         # Kalman Gain
         S = self.H @ self.P @ self.H.T + self.R
         K = self.P @ self.H.T @ np.linalg.inv(S)
 
-        # Residual
-        measurement = bbox.reshape((4, 1))
+        # Innovation (residual)
         y = measurement - self.H @ self.state
 
         # Update state
@@ -128,19 +421,11 @@ def update(self, bbox: np.ndarray) -> None:
         identity_matrix = np.eye(8, dtype=np.float32)
         self.P = (identity_matrix - K @ self.H) @ self.P
 
+        self.state[2, 0] = max(self.state[2, 0], 1e-3)
+        self.state[3, 0] = max(self.state[3, 0], 1e-3)
+
     def get_state_bbox(self) -> np.ndarray:
         """
-        Returns the current bounding box estimate from the state vector.
-
-        Returns:
-            The bounding box [x1, y1, x2, y2].
-        """
-        return np.array(
-            [
-                self.state[0],  # x1
-                self.state[1],  # y1
-                self.state[2],  # x2
-                self.state[3],  # y2
-            ],
-            dtype=float,
-        ).reshape(-1)
+        Return current predicted box in xyxy format.
+        """
+        return self.xywh_to_xyxy(self.state[0:4, 0])
\ No newline at end of file

From 809cf3013de65750ca5e50d8a344e2f99c58338a Mon Sep 17 00:00:00 2001
From: Tomasz Stanczyk <stanior666@gmail.com>
Date: Tue, 10 Mar 2026 15:38:52 +0100
Subject: [PATCH 05/14] Adjust comment line lengths

---
 trackers/core/botsort/cmc.py                | 27 ++++++++++++---------
 trackers/core/botsort/kalman_box_tracker.py | 21 +++++++++-------
 2 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/trackers/core/botsort/cmc.py b/trackers/core/botsort/cmc.py
index 0fc16f72..45ddeeb9 100644
--- a/trackers/core/botsort/cmc.py
+++ b/trackers/core/botsort/cmc.py
@@ -487,7 +487,8 @@ def _estimate_sparse_optflow(self, frame_bgr: np.ndarray) -> np.ndarray:
     @staticmethod
     def apply_to_tracks(tracks: list, H: np.ndarray) -> None:
         """
-        Apply a global affine motion transform to tracker states and covariances in-place.
+        Apply a global affine motion transform to tracker states and covariances 
+        in-place.
 
         This method updates each track according to the affine transform
 
@@ -495,7 +496,8 @@ def apply_to_tracks(tracks: list, H: np.ndarray) -> None:
 
         where:
             R:
-                2x2 linear part of the affine transform (rotation / shear / scale-like part).
+                2x2 linear part of the affine transform (rotation / shear / scale-like 
+                part).
             t:
                 2D translation vector.
 
@@ -521,8 +523,8 @@ def apply_to_tracks(tracks: list, H: np.ndarray) -> None:
                     Velocities of the width and height.
 
         State update logic:
-            The affine transform is applied only to the geometric quantities that live in
-            the 2D image plane as position or velocity vectors:
+            The affine transform is applied only to the geometric quantities that live 
+            in the 2D image plane as position or velocity vectors:
 
             1) Center position:
                    [xc, yc]^T = R @ [xc, yc]^T + t
@@ -536,14 +538,14 @@ def apply_to_tracks(tracks: list, H: np.ndarray) -> None:
         Why width and height are not transformed here:
             Width and height are scalar box dimensions, not 2D point coordinates.
             In this implementation, camera motion compensation is used to correct the
-            object center location and its image-plane velocity, while the box size terms
-            are left unchanged. This keeps the compensation simple and consistent with the
-            state representation used by the tracker.
+            object center location and its image-plane velocity, while the box size 
+            terms are left unchanged. This keeps the compensation simple and consistent 
+            with the state representation used by the tracker.
 
         Covariance update:
             Each track also stores a covariance matrix `P` describing uncertainty in the
-            8D Kalman state. After the mean state is transformed, the covariance is updated
-            using the linear transform
+            8D Kalman state. After the mean state is transformed, the covariance is 
+            updated using the linear transform
 
                 P = A @ P @ A.T
 
@@ -573,10 +575,11 @@ def apply_to_tracks(tracks: list, H: np.ndarray) -> None:
 
         Notes:
             - If `H` is None or `tracks` is empty, this method does nothing.
-            - The method assumes that `H` has already been estimated in image coordinates
-              consistent with the tracker state.
+            - The method assumes that `H` has already been estimated in image 
+              coordinates onsistent with the tracker state.
             - This method does not perform any validity checks on whether the estimated
-              transform is physically plausible; it simply applies the provided transform.
+              transform is physically plausible; it simply applies the provided 
+              transform.
         """
         if H is None or len(tracks) == 0:
             return
diff --git a/trackers/core/botsort/kalman_box_tracker.py b/trackers/core/botsort/kalman_box_tracker.py
index 90cc04b2..91b1a409 100644
--- a/trackers/core/botsort/kalman_box_tracker.py
+++ b/trackers/core/botsort/kalman_box_tracker.py
@@ -162,8 +162,8 @@ def _initialize_kalman_filter(self, measurement: np.ndarray) -> None:
         This method initializes the following matrices:
 
         State transition matrix:
-            F is an 8x8 matrix defining how the state evolves from one frame to the next.
-            It implements a constant-velocity model:
+            F is an 8x8 matrix defining how the state evolves from one frame to the 
+            next. It implements a constant-velocity model:
                 xc <- xc + vxc
                 yc <- yc + vyc
                 w  <- w  + vw
@@ -251,7 +251,8 @@ def _initialize_kalman_filter(self, measurement: np.ndarray) -> None:
 
     def _update_process_and_measurement_noise(self) -> None:
         """
-        Recompute the process and measurement noise covariances from the current box size.
+        Recompute the process and measurement noise covariances from the current box 
+        size.
 
         This method updates:
 
@@ -266,13 +267,14 @@ def _update_process_and_measurement_noise(self) -> None:
         Why this update is needed:
             The scale of the uncertainty should depend on the current object size.
             For example, a 2-pixel error is relatively more important for a small object
-            than for a large one. Therefore, the diagonal entries of Q and R are computed
-            from the current predicted width and height stored in the state.
+            than for a large one. Therefore, the diagonal entries of Q and R are 
+            computed from the current predicted width and height stored in the state.
 
         Implementation details:
             - Width and height are read from the current state:
                   w = state[2], h = state[3]
-            - They are clamped to a small positive minimum to avoid zero or negative values.
+            - They are clamped to a small positive minimum to avoid zero or negative 
+              values.
             - The resulting Q and R matrices remain diagonal.
 
         Notes:
@@ -324,9 +326,10 @@ def predict(self) -> None:
                 Process noise covariance.
 
         Effect of the prediction:
-            - The center position and box size are advanced using their current velocities.
-            - The covariance matrix P is propagated forward and increased by Q to reflect
-              additional uncertainty introduced during motion prediction.
+            - The center position and box size are advanced using their current 
+              velocities.
+            - The covariance matrix P is propagated forward and increased by Q to 
+              reflect additional uncertainty introduced during motion prediction.
 
         Additional behavior:
             - The process and measurement noise matrices are refreshed first by calling

From 8cdf29dd639faf27386771d5e5925279654f45e5 Mon Sep 17 00:00:00 2001
From: Tomasz Stanczyk <stanior666@gmail.com>
Date: Tue, 10 Mar 2026 15:46:09 +0100
Subject: [PATCH 06/14] Remove the bad quotation sign from docs

---
 trackers/core/botsort/cmc.py     | 2 +-
 trackers/core/botsort/tracker.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/trackers/core/botsort/cmc.py b/trackers/core/botsort/cmc.py
index 45ddeeb9..6243c159 100644
--- a/trackers/core/botsort/cmc.py
+++ b/trackers/core/botsort/cmc.py
@@ -60,7 +60,7 @@ class CMCConfig:
 
         ransac_reproj_threshold:
             (ORB only) RANSAC reprojection threshold in pixels passed to
-            OpenCV’s affine estimation. It controls how far a point is allowed to 
+            OpenCV's affine estimation. It controls how far a point is allowed to 
             deviate from the estimated model while still being counted as an inlier.
             Smaller values are stricter (reject more matches); larger values are more 
             tolerant.
diff --git a/trackers/core/botsort/tracker.py b/trackers/core/botsort/tracker.py
index 05717551..fa2db77a 100644
--- a/trackers/core/botsort/tracker.py
+++ b/trackers/core/botsort/tracker.py
@@ -116,7 +116,7 @@ def _update_detections(
         Apply matched detection updates to tracks and append corresponding outputs.
 
         For each (track_idx, det_idx) match:
-        - Update the track’s Kalman state with the detection bbox.
+        - Update the track's Kalman state with the detection bbox.
         - If the track is “mature” (>= minimum_consecutive_frames) and still has 
           tracker_id == -1, assign a new unique tracker ID.
         - Create a single-row `sv.Detections` object for the matched detection and set 

From 504b7b58576a03fb13c5c8795b4b562457317c2d Mon Sep 17 00:00:00 2001
From: Tomasz Stanczyk <stanior666@gmail.com>
Date: Mon, 16 Mar 2026 14:39:00 +0100
Subject: [PATCH 07/14] Add ECC camera motion compensation

---
 trackers/core/botsort/cmc.py     | 121 ++++++++++++++++++++++++++++++-
 trackers/core/botsort/tracker.py |   4 +-
 2 files changed, 119 insertions(+), 6 deletions(-)

diff --git a/trackers/core/botsort/cmc.py b/trackers/core/botsort/cmc.py
index 6243c159..e480226b 100644
--- a/trackers/core/botsort/cmc.py
+++ b/trackers/core/botsort/cmc.py
@@ -11,7 +11,7 @@
 import numpy as np
 import cv2
 
-CMCTMethod = Literal["orb", "sift", "sparseOptFlow"]
+CMCTMethod = Literal["orb", "sift", "sparseOptFlow", "ecc"]
 
 @dataclass
 class CMCConfig:
@@ -40,6 +40,10 @@ class CMCConfig:
             - "sparseOptFlow": Sparse optical flow using corner tracking:
               goodFeaturesToTrack -> calcOpticalFlowPyrLK -> robust affine estimation 
               (RANSAC).
+            - "ecc": Global image alignment using the Enhanced Correlation Coefficient
+              (ECC) optimization method. This estimates a 2D Euclidean transform
+              directly from grayscale image intensities rather than from sparse feature
+              correspondences.
 
         downscale:
             Integer downscale factor applied to frames before running CMC.
@@ -134,6 +138,19 @@ class CMCConfig:
         sof_k:
             (SparseOptFlow only) `k` passed to `cv2.goodFeaturesToTrack`.
             Harris detector free parameter. Ignored if `sof_use_harris` is False.
+
+        ecc_number_of_iterations:
+            (ECC only) Maximum number of optimization iterations used by the ECC
+            alignment procedure.
+
+        ecc_termination_eps:
+            (ECC only) Convergence tolerance used by the ECC optimizer.
+            Smaller values require a more precise fit and may increase runtime.
+
+        ecc_gaussian_filter_size:
+            (ECC only) Gaussian filter size parameter passed to OpenCV's
+            `findTransformECC`. This can help stabilize optimization on noisy frames.
+            A value of 1 matches the current implementation.
     """
     method: CMCTMethod = "sparseOptFlow"
     downscale: int = 2
@@ -160,6 +177,18 @@ class CMCConfig:
     sof_use_harris: bool = False
     sof_k: float = 0.04
 
+    # ECC parameters
+
+    # BoT-SORT's original - resulting in veeery long (=unacceptably long) execution time 
+    # ecc_number_of_iterations: int = 5000
+    # ecc_termination_eps: float = 1e-6
+
+    # Adjusted
+    ecc_number_of_iterations: int = 50
+    ecc_termination_eps: float = 1e-4
+
+    ecc_gaussian_filter_size: int = 1
+
 
 class CMC:
     """
@@ -189,6 +218,7 @@ def __init__(self, cfg: Optional[CMCConfig] = None) -> None:
         Notes:
             - Detector/extractor/matcher are only created if method is "orb" or "sift".
             - feature_paramsare only created if method is "sparseOptFlow".
+            - ECC optimization settings are created for "ecc".
         """
         self.cfg = cfg or CMCConfig()
         self.downscale = max(1, int(self.cfg.downscale))
@@ -222,6 +252,13 @@ def __init__(self, cfg: Optional[CMCConfig] = None) -> None:
                 useHarrisDetector=self.cfg.sof_use_harris,
                 k=self.cfg.sof_k,
             )
+        elif self.cfg.method == "ecc":
+            self.warp_mode = cv2.MOTION_EUCLIDEAN
+            self.criteria = (
+                cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT,
+                self.cfg.ecc_number_of_iterations,
+                self.cfg.ecc_termination_eps,
+            )
 
         self.reset()
 
@@ -252,9 +289,9 @@ def estimate(self, frame_bgr: np.ndarray,
 
         Args:
             frame_bgr: Current frame in BGR format (uint8), shape (H, W, 3).
-            dets_xyxy: Optional detections (N,4) in xyxy format, in original image 
-                scale. Used only by ORB method for masking out object regions 
-                (background-only features).
+            dets_xyxy: Optional detections (N,4) in xyxy format, in original image
+                scale. Used by feature-based methods (ORB and SIFT) to mask out object 
+                regions during motion estimation.
 
         Returns:
             H: Affine transform matrix of shape (2, 3), dtype float32.
@@ -269,6 +306,9 @@ def estimate(self, frame_bgr: np.ndarray,
         if self.cfg.method == "sparseOptFlow":
             return self._estimate_sparse_optflow(frame_bgr)
 
+        if self.cfg.method == "ecc":
+            return self._estimate_ecc(frame_bgr)
+
         # fallback
         return np.eye(2, 3, dtype=np.float32)
 
@@ -483,6 +523,79 @@ def _estimate_sparse_optflow(self, frame_bgr: np.ndarray) -> np.ndarray:
         self._prev_points = None if keypoints is None else keypoints.copy()
 
         return H_aff
+    
+
+    def _estimate_ecc(self, frame_bgr: np.ndarray) -> np.ndarray:
+        """
+        ECC-based affine motion estimation.
+
+        This method estimates a global 2D Euclidean transform between the previous
+        frame and the current frame using OpenCV's Enhanced Correlation Coefficient
+        (ECC) image alignment algorithm.
+
+        Steps:
+            1) Convert the current frame to grayscale.
+            2) Optionally smooth and downscale the frame.
+            3) If this is the first frame, store it and return identity.
+            4) Optimize a 2x3 warp matrix aligning the previous frame to the current 
+               frame.
+            5) If optimization succeeds, return the estimated transform.
+               Otherwise, keep the identity transform.
+            6) Store the current frame for the next call.
+
+        Args:
+            frame_bgr:
+                Current frame in BGR format.
+
+        Returns:
+            H:
+                Affine transform matrix of shape (2, 3), dtype float32, mapping
+                previous-frame coordinates to current-frame coordinates. Returns 
+                identity if initialization has not yet occurred or if ECC optimization 
+                fails.
+        """
+        H_img, W_img = frame_bgr.shape[:2]
+        frame = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
+
+        H_aff = np.eye(2, 3, dtype=np.float32)
+
+        if self.downscale > 1:
+            frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
+            frame = cv2.resize(frame, (W_img // self.downscale, H_img // self.downscale))
+
+        if not self._initialized:
+            self._prev_frame_gray = frame.copy()
+            self._initialized = True
+            return H_aff
+
+        if self._prev_frame_gray is None:
+            self._prev_frame_gray = frame.copy()
+            return H_aff
+
+        try:
+            _cc, H_est = cv2.findTransformECC(
+                self._prev_frame_gray,
+                frame,
+                H_aff,
+                self.warp_mode,
+                self.criteria,
+                None,
+                self.cfg.ecc_gaussian_filter_size,
+            )
+            if H_est is not None:
+                H_aff = H_est.astype(np.float32)
+        except cv2.error as e:
+            print('Warning: find transform failed. Set warp as identity')
+            pass
+
+        # NOTE: this line is not included in the original BoT-SORT. However,
+        # in a working recurrent estimator, you do need to update the previous frame 
+        # after each call. Otherwise the next call would keep aligning against an old 
+        # frame.
+        self._prev_frame_gray = frame.copy()
+        
+        return H_aff
+
 
     @staticmethod
     def apply_to_tracks(tracks: list, H: np.ndarray) -> None:
diff --git a/trackers/core/botsort/tracker.py b/trackers/core/botsort/tracker.py
index fa2db77a..b7bb99dc 100644
--- a/trackers/core/botsort/tracker.py
+++ b/trackers/core/botsort/tracker.py
@@ -82,8 +82,8 @@ def __init__(
                 - low confidence:  confidence < threshold
             enable_cmc: Whether to enable camera motion compensation (CMC).
             cmc_method: CMC method string passed into `CMCConfig(method=...)`. 
-                Supported values depend on `CMC` (e.g. "orb", "sift", "sparseOptFlow"). 
-                See CMCConfig.
+                Supported values depend on `CMC` (e.g. "orb", "sift", "sparseOptFlow", 
+                "ecc"). See CMCConfig.
             cmc_downscale: Downscale factor used inside CMC for speed/robustness.
 
         Notes:

From 4a8e2762f855c1f7adfb8e83a61413675c93095d Mon Sep 17 00:00:00 2001
From: Tomasz Stanczyk <stanior666@gmail.com>
Date: Sun, 22 Mar 2026 12:01:38 +0100
Subject: [PATCH 08/14] Change min_iou_match thresh value. Introduce separate
 thresh for second assoc step

---
 trackers/core/botsort/tracker.py | 78 +++++++++++++++++++++-----------
 1 file changed, 52 insertions(+), 26 deletions(-)

diff --git a/trackers/core/botsort/tracker.py b/trackers/core/botsort/tracker.py
index b7bb99dc..8b09b953 100644
--- a/trackers/core/botsort/tracker.py
+++ b/trackers/core/botsort/tracker.py
@@ -42,7 +42,10 @@ class BoTSORTTracker(BaseTracker):
             unmatched before being removed.
         minimum_consecutive_frames: Track maturity threshold before assigning a 
             permanent ID.
-        minimum_iou_threshold: Minimum IoU required for a valid match.
+        minimum_iou_threshold_first_assoc: Minimum IoU required for a valid match
+            in the first association step
+        minimum_iou_threshold_second_assoc: Minimum IoU required for a valid match
+            in the second association step
         track_activation_threshold: Confidence threshold for spawning a new track.
         high_conf_det_threshold: Confidence threshold splitting detections into 
             high/low groups.
@@ -57,7 +60,8 @@ def __init__(
         frame_rate: float = 30.0,
         track_activation_threshold: float = 0.7,
         minimum_consecutive_frames: int = 2,
-        minimum_iou_threshold: float = 0.1,
+        minimum_iou_threshold_first_assoc: float = 0.2,
+        minimum_iou_threshold_second_assoc: float = 0.5,
         high_conf_det_threshold: float = 0.6,
         enable_cmc: bool = True,
         cmc_method: str = "sparseOptFlow",
@@ -76,7 +80,10 @@ def __init__(
                 track.
             minimum_consecutive_frames: Number of successful updates required before 
                 assigning a stable track ID (different than initial -1).
-            minimum_iou_threshold: Minimum IoU to accept a detection-track association.
+            minimum_iou_threshold_first_assoc: Minimum IoU to accept a detection-track 
+                association during the first association step.
+            minimum_iou_threshold_second_assoc: Minimum IoU to accept a detection-track 
+                association during the second association step.
             high_conf_det_threshold: Confidence threshold used to split detections into:
                 - high confidence: confidence >= threshold
                 - low confidence:  confidence < threshold
@@ -96,7 +103,8 @@ def __init__(
         # consistent time-based tracking across different frame rates.
         self.maximum_frames_without_update = int(frame_rate / 30.0 * lost_track_buffer)
         self.minimum_consecutive_frames = minimum_consecutive_frames
-        self.minimum_iou_threshold = minimum_iou_threshold
+        self.minimum_iou_threshold_first_assoc = minimum_iou_threshold_first_assoc
+        self.minimum_iou_threshold_second_assoc = minimum_iou_threshold_second_assoc
         self.track_activation_threshold = track_activation_threshold
         self.high_conf_det_threshold = high_conf_det_threshold
         self.tracks: list[BoTSORTKalmanBoxTracker] = []
@@ -211,6 +219,7 @@ def update(
             self._similarity_step(
                 high_prob_detections,
                 self.tracks,
+                self.minimum_iou_threshold_first_assoc
             )
         )
 
@@ -226,7 +235,9 @@ def update(
 
         # Step 2: associate Low Probability detections with remaining tracks
         matched_indices, unmatched_tracks, unmatched_detections = self._similarity_step(
-            low_prob_detections, remaining_tracks
+            low_prob_detections, 
+            remaining_tracks, 
+            self.minimum_iou_threshold_second_assoc
         )
 
         # Update matched tracks with low-confidence detections
@@ -263,33 +274,47 @@ def update(
         if len(final_updated_detections) == 0:
             final_updated_detections.tracker_id = np.array([], dtype=int)
         return final_updated_detections
-
+    
     def _get_high_and_low_probability_detections(
-        self, detections: sv.Detections
-    ) -> tuple[sv.Detections, sv.Detections]:
+            self, detections: sv.Detections
+        ) -> tuple[sv.Detections, sv.Detections]:
         """
-        Splits the input detections into high-confidence and low-confidence sets
-        based on the `self.high_conf_det_threshold`.
+        Split detections into high-confidence and low-confidence sets.
+
+        Detections with confidence <= 0.1 are discarded completely and are not
+        used by the tracker.
+
+        Rules:
+            high-confidence:
+                confidence >= self.high_conf_det_threshold
+
+            low-confidence:
+                0.1 < confidence < self.high_conf_det_threshold
+
+            discarded:
+                confidence <= 0.1
 
         Args:
-            detections: The input detections with confidence scores.
+            detections:
+                Input detections containing confidence scores.
 
         Returns:
-            A tuple containing two `sv.Detections objects`: the first for
-                high-confidence detections `(confidence >= threshold)` and the second
-                for low-confidence detections `(confidence < threshold)`.
+            Tuple:
+                (high_confidence_detections, low_confidence_detections)
         """
-        # Check if confidence scores exist before comparing
-        if detections.confidence is not None:
-            # Perform element-wise comparison if confidence is a NumPy array
-            condition = detections.confidence >= self.high_conf_det_threshold
-        else:
-            # If no confidence scores, no detections meet the threshold
-            # Create a boolean array of False with the same length as detections
-            condition = np.zeros(len(detections), dtype=bool)
-
-        high_confidence = detections[condition]
-        low_confidence = detections[np.logical_not(condition)]
+
+        if detections.confidence is None:
+            # If no confidence information exists, treat all detections as high-confidence
+            return detections, detections[:0]
+
+        conf = detections.confidence
+
+        high_mask = conf >= self.high_conf_det_threshold
+        low_mask = (conf > 0.1) & (conf < self.high_conf_det_threshold)
+
+        high_confidence = detections[high_mask]
+        low_confidence = detections[low_mask]
+
         return high_confidence, low_confidence
 
     def _get_associated_indices(
@@ -375,6 +400,7 @@ def _similarity_step(
         self,
         detections: sv.Detections,
         tracks: list[BoTSORTKalmanBoxTracker],
+        thresh: float
     ) -> tuple[list[tuple[int, int]], set[int], set[int]]:
         """Measures similarity based on IoU between tracks and detections and returns 
             the matches and unmatched tracks/detections. Is used for step 1 and 2 of the
@@ -383,6 +409,7 @@ def _similarity_step(
         Args:
             detections: The set of object detections.
             tracks: The list of tracks that will be matched to the detections.
+            thresh: Minimum IoU required for a valid match.
 
         Returns:
             A tuple containing:
@@ -394,7 +421,6 @@ def _similarity_step(
         """  # noqa: E501
         # Build IoU cost matrix between detections and predicted bounding boxes
         similarity_matrix = get_iou_matrix(tracks, detections.xyxy)
-        thresh = self.minimum_iou_threshold
 
         # Associate detections to tracks based on the higher value of the
         # similarity matrix, using the Jonker-Volgenant algorithm 

From 6db9eded520032161759aeb4f605a6e58082bb96 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 22 Mar 2026 11:03:18 +0000
Subject: [PATCH 09/14] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto?=
 =?UTF-8?q?=20format=20pre-commit=20hooks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 trackers/core/botsort/__init__.py           |   2 +-
 trackers/core/botsort/cmc.py                | 169 +++++++++++---------
 trackers/core/botsort/kalman_box_tracker.py | 115 +++++++------
 trackers/core/botsort/tracker.py            |  93 ++++++-----
 trackers/core/botsort/utils.py              |   4 +-
 5 files changed, 207 insertions(+), 176 deletions(-)

diff --git a/trackers/core/botsort/__init__.py b/trackers/core/botsort/__init__.py
index e0bc8c7c..8bae3857 100644
--- a/trackers/core/botsort/__init__.py
+++ b/trackers/core/botsort/__init__.py
@@ -5,4 +5,4 @@
 # ------------------------------------------------------------------------
 from .tracker import BoTSORTTracker
 
-__all__ = ["BoTSORTTracker"]
\ No newline at end of file
+__all__ = ["BoTSORTTracker"]
diff --git a/trackers/core/botsort/cmc.py b/trackers/core/botsort/cmc.py
index e480226b..8deb976b 100644
--- a/trackers/core/botsort/cmc.py
+++ b/trackers/core/botsort/cmc.py
@@ -4,41 +4,42 @@
 # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 # ------------------------------------------------------------------------
 
+import copy
 from dataclasses import dataclass
-from typing import Optional, Literal
+from typing import Literal
 
-import copy
-import numpy as np
 import cv2
+import numpy as np
 
 CMCTMethod = Literal["orb", "sift", "sparseOptFlow", "ecc"]
 
+
 @dataclass
 class CMCConfig:
     """
     Configuration for camera motion compensation (CMC).
 
-    The CMC module estimates a global 2D affine transform `H` (2x3) between consecutive 
-    frames. This transform is then applied to predicted track states before data 
+    The CMC module estimates a global 2D affine transform `H` (2x3) between consecutive
+    frames. This transform is then applied to predicted track states before data
     association.
 
     Attributes:
         method:
             Camera motion estimation method.
 
-            - "orb": Feature matching using 
+            - "orb": Feature matching using
               FAST keypoints + ORB descriptors + BFMatcher (Hamming),
               followed by robust affine estimation (RANSAC).
               Optionally masks out detection boxes so features are extracted from
               background.
-            - "sift": Feature matching using 
-              SIFT keypoints + SIFT descriptors + BFMatcher (L2), 
-              followed by robust affine estimation (RANSAC). 
+            - "sift": Feature matching using
+              SIFT keypoints + SIFT descriptors + BFMatcher (L2),
+              followed by robust affine estimation (RANSAC).
               Optionally masks out detection boxes so features are extracted from
-              background. "sift" generally produces fewer but more distinctive matches 
+              background. "sift" generally produces fewer but more distinctive matches
               than ORB at higher compute cost.
             - "sparseOptFlow": Sparse optical flow using corner tracking:
-              goodFeaturesToTrack -> calcOpticalFlowPyrLK -> robust affine estimation 
+              goodFeaturesToTrack -> calcOpticalFlowPyrLK -> robust affine estimation
               (RANSAC).
             - "ecc": Global image alignment using the Enhanced Correlation Coefficient
               (ECC) optimization method. This estimates a 2D Euclidean transform
@@ -54,23 +55,23 @@ class CMCConfig:
             Behavior:
             - Frames are resized to (W//downscale, H//downscale) for motion estimation.
             - The resulting affine translation components H[0,2], H[1,2] are scaled back
-              by multiplying by `downscale`, so the transform is in original image 
+              by multiplying by `downscale`, so the transform is in original image
               coordinates.
 
         fast_threshold:
             (ORB only) Threshold for the FAST keypoint detector.
-            Higher values yield fewer keypoints (more selective); lower values yield 
+            Higher values yield fewer keypoints (more selective); lower values yield
             more keypoints.
 
         ransac_reproj_threshold:
             (ORB only) RANSAC reprojection threshold in pixels passed to
-            OpenCV's affine estimation. It controls how far a point is allowed to 
+            OpenCV's affine estimation. It controls how far a point is allowed to
             deviate from the estimated model while still being counted as an inlier.
-            Smaller values are stricter (reject more matches); larger values are more 
+            Smaller values are stricter (reject more matches); larger values are more
             tolerant.
 
         max_spatial_distance_frac:
-            (ORB only) Maximum allowed spatial displacement for a tentative match, 
+            (ORB only) Maximum allowed spatial displacement for a tentative match,
             expressed as a fraction of (image width, image height) *after downscale*.
 
             Example:
@@ -78,37 +79,37 @@ class CMCConfig:
                 then a match is rejected if |dx| >= 0.25*W or |dy| >= 0.25*H.
 
             Motivation:
-                Reject obviously incorrect descriptor matches whose displacement is 
+                Reject obviously incorrect descriptor matches whose displacement is
                 implausibly large.
 
         roi_min_frac:
-            (ORB only) Lower bound of the region-of-interest (ROI) used to select 
-            keypoints, expressed as a fraction of frame size. Points outside the ROI 
+            (ORB only) Lower bound of the region-of-interest (ROI) used to select
+            keypoints, expressed as a fraction of frame size. Points outside the ROI
             are masked out.
 
             Example:
                 roi_min_frac=0.02 means we ignore a ~2% border on each side.
 
         roi_max_frac:
-            (ORB only) Upper bound of the ROI used to select keypoints (fraction of 
+            (ORB only) Upper bound of the ROI used to select keypoints (fraction of
             frame size). Together with roi_min_frac, it defines a central rectangle:
                 [roi_min_frac..roi_max_frac] in both x and y.
 
-        sift_n_octave_layers: 
-            (SIFT only) Number of octave layers used by SIFT when constructing the 
-            scale-space pyramid. Increasing this can increase sensitivity to scale 
+        sift_n_octave_layers:
+            (SIFT only) Number of octave layers used by SIFT when constructing the
+            scale-space pyramid. Increasing this can increase sensitivity to scale
             changes, at higher compute cost.
 
-        sift_contrast_threshold: 
-            (SIFT only) Threshold controlling how sensitive SIFT is 
-            to low-contrast keypoints. Lower values generally produce more keypoints; 
+        sift_contrast_threshold:
+            (SIFT only) Threshold controlling how sensitive SIFT is
+            to low-contrast keypoints. Lower values generally produce more keypoints;
             higher values are stricter.
 
-        sift_edge_threshold: 
-            (SIFT only) Threshold controlling rejection of keypoints on edges. 
-            Lower values reject more edge-like responses; higher values are more 
+        sift_edge_threshold:
+            (SIFT only) Threshold controlling rejection of keypoints on edges.
+            Lower values reject more edge-like responses; higher values are more
             permissive.
-            
+
         sof_max_corners:
             (SparseOptFlow only) `maxCorners` passed to `cv2.goodFeaturesToTrack`.
             Maximum number of corners to detect for tracking.
@@ -116,23 +117,23 @@ class CMCConfig:
 
         sof_quality_level:
             (SparseOptFlow only) `qualityLevel` passed to `cv2.goodFeaturesToTrack`.
-            Minimum accepted quality of corners. A higher value keeps only stronger 
+            Minimum accepted quality of corners. A higher value keeps only stronger
             corners; a lower value yields more corners (including weaker ones).
 
         sof_min_distance:
             (SparseOptFlow only) `minDistance` passed to `cv2.goodFeaturesToTrack`.
             Minimum Euclidean distance (in pixels) between returned corners.
-            Higher values produce more spatially spread points; lower values allow 
+            Higher values produce more spatially spread points; lower values allow
             clustering.
 
         sof_block_size:
             (SparseOptFlow only) `blockSize` passed to `cv2.goodFeaturesToTrack`.
-            Size of the neighborhood used to compute corner quality (structure tensor 
+            Size of the neighborhood used to compute corner quality (structure tensor
             window).
 
         sof_use_harris:
-            (SparseOptFlow only) `useHarrisDetector` passed to 
-            `cv2.goodFeaturesToTrack`. If True, uses the Harris corner measure; 
+            (SparseOptFlow only) `useHarrisDetector` passed to
+            `cv2.goodFeaturesToTrack`. If True, uses the Harris corner measure;
             if False, uses the Shi-Tomasi measure.
 
         sof_k:
@@ -152,6 +153,7 @@ class CMCConfig:
             `findTransformECC`. This can help stabilize optimization on noisy frames.
             A value of 1 matches the current implementation.
     """
+
     method: CMCTMethod = "sparseOptFlow"
     downscale: int = 2
 
@@ -179,7 +181,7 @@ class CMCConfig:
 
     # ECC parameters
 
-    # BoT-SORT's original - resulting in veeery long (=unacceptably long) execution time 
+    # BoT-SORT's original - resulting in veeery long (=unacceptably long) execution time
     # ecc_number_of_iterations: int = 5000
     # ecc_termination_eps: float = 1e-6
 
@@ -204,11 +206,11 @@ class CMC:
 
     Notes:
         - H maps points from previous frame coordinates to current frame coordinates.
-        - This class does not perform any drawing/visualization; it only estimates 
+        - This class does not perform any drawing/visualization; it only estimates
         transforms.
     """
 
-    def __init__(self, cfg: Optional[CMCConfig] = None) -> None:
+    def __init__(self, cfg: CMCConfig | None = None) -> None:
         """
         Initialize CMC.
 
@@ -274,23 +276,24 @@ def reset(self) -> None:
 
         # ORB state
         self._prev_kps = None
-        self._prev_desc: Optional[np.ndarray] = None
+        self._prev_desc: np.ndarray | None = None
 
         # SparseOptFlow state
-        self._prev_frame_gray: Optional[np.ndarray] = None
+        self._prev_frame_gray: np.ndarray | None = None
 
         # shape (N,1,2) from goodFeaturesToTrack
-        self._prev_points: Optional[np.ndarray] = None  
+        self._prev_points: np.ndarray | None = None
 
-    def estimate(self, frame_bgr: np.ndarray, 
-                 dets_xyxy: Optional[np.ndarray] = None) -> np.ndarray:
+    def estimate(
+        self, frame_bgr: np.ndarray, dets_xyxy: np.ndarray | None = None
+    ) -> np.ndarray:
         """
         Estimate global affine transform H (2x3) from previous frame to current frame.
 
         Args:
             frame_bgr: Current frame in BGR format (uint8), shape (H, W, 3).
             dets_xyxy: Optional detections (N,4) in xyxy format, in original image
-                scale. Used by feature-based methods (ORB and SIFT) to mask out object 
+                scale. Used by feature-based methods (ORB and SIFT) to mask out object
                 regions during motion estimation.
 
         Returns:
@@ -312,10 +315,11 @@ def estimate(self, frame_bgr: np.ndarray,
         # fallback
         return np.eye(2, 3, dtype=np.float32)
 
-    def _estimate_feature_affine(self, frame_bgr: np.ndarray, 
-                      dets_xyxy: Optional[np.ndarray] = None) -> np.ndarray:
+    def _estimate_feature_affine(
+        self, frame_bgr: np.ndarray, dets_xyxy: np.ndarray | None = None
+    ) -> np.ndarray:
         """
-        Feature affine estimation. ORB-based or SIFT-based 
+        Feature affine estimation. ORB-based or SIFT-based
         (different initializations of self.detector, self.extractor and self.matcher for
         ORB and SIFT)
 
@@ -388,8 +392,9 @@ def _estimate_feature_affine(self, frame_bgr: np.ndarray,
             self._prev_desc = copy.copy(desc)
             return H_aff
 
-        max_spatial = self.cfg.max_spatial_distance_frac * np.array([W, H], 
-                                                                    dtype=np.float32)
+        max_spatial = self.cfg.max_spatial_distance_frac * np.array(
+            [W, H], dtype=np.float32
+        )
 
         prev_pts = []
         curr_pts = []
@@ -461,8 +466,9 @@ def _estimate_sparse_optflow(self, frame_bgr: np.ndarray) -> np.ndarray:
 
         # Downscale
         if self.downscale > 1:
-            frame = cv2.resize(frame, (W_img // self.downscale, 
-                                       H_img // self.downscale))
+            frame = cv2.resize(
+                frame, (W_img // self.downscale, H_img // self.downscale)
+            )
 
         # Find keypoints in current frame
         keypoints = cv2.goodFeaturesToTrack(frame, mask=None, **self.feature_params)
@@ -475,15 +481,20 @@ def _estimate_sparse_optflow(self, frame_bgr: np.ndarray) -> np.ndarray:
             return H_aff
 
         # If we don't have points, re-init
-        if self._prev_frame_gray is None or self._prev_points is None or keypoints is None:
+        if (
+            self._prev_frame_gray is None
+            or self._prev_points is None
+            or keypoints is None
+        ):
             self._prev_frame_gray = frame.copy()
             self._prev_points = copy.copy(keypoints)
             return H_aff
 
         # Optical flow correspondences
         # calcOpticalFlowPyrLK will throw or return nonsense if we give it None
-        matched, status, _err = cv2.calcOpticalFlowPyrLK(self._prev_frame_gray, frame, 
-                                                         self._prev_points, None)
+        matched, status, _err = cv2.calcOpticalFlowPyrLK(
+            self._prev_frame_gray, frame, self._prev_points, None
+        )
 
         if status is None or matched is None:
             self._prev_frame_gray = frame.copy()
@@ -505,7 +516,9 @@ def _estimate_sparse_optflow(self, frame_bgr: np.ndarray) -> np.ndarray:
         curr_pts = np.array(curr_pts)
 
         # Find rigid matrix
-        if (np.size(prev_pts, 0) > 4) and (np.size(prev_pts, 0) == np.size(curr_pts, 0)):
+        if (np.size(prev_pts, 0) > 4) and (
+            np.size(prev_pts, 0) == np.size(curr_pts, 0)
+        ):
             H_est, _ = cv2.estimateAffinePartial2D(prev_pts, curr_pts, cv2.RANSAC)
             if H_est is not None:
                 H_aff = H_est.astype(np.float32)
@@ -515,7 +528,7 @@ def _estimate_sparse_optflow(self, frame_bgr: np.ndarray) -> np.ndarray:
                     H_aff[0, 2] *= self.downscale
                     H_aff[1, 2] *= self.downscale
         else:
-            print('Warning: not enough matching points')
+            print("Warning: not enough matching points")
 
         # Store to next iteration
         self._prev_frame_gray = frame.copy()
@@ -523,7 +536,6 @@ def _estimate_sparse_optflow(self, frame_bgr: np.ndarray) -> np.ndarray:
         self._prev_points = None if keypoints is None else keypoints.copy()
 
         return H_aff
-    
 
     def _estimate_ecc(self, frame_bgr: np.ndarray) -> np.ndarray:
         """
@@ -537,7 +549,7 @@ def _estimate_ecc(self, frame_bgr: np.ndarray) -> np.ndarray:
             1) Convert the current frame to grayscale.
             2) Optionally smooth and downscale the frame.
             3) If this is the first frame, store it and return identity.
-            4) Optimize a 2x3 warp matrix aligning the previous frame to the current 
+            4) Optimize a 2x3 warp matrix aligning the previous frame to the current
                frame.
             5) If optimization succeeds, return the estimated transform.
                Otherwise, keep the identity transform.
@@ -550,8 +562,8 @@ def _estimate_ecc(self, frame_bgr: np.ndarray) -> np.ndarray:
         Returns:
             H:
                 Affine transform matrix of shape (2, 3), dtype float32, mapping
-                previous-frame coordinates to current-frame coordinates. Returns 
-                identity if initialization has not yet occurred or if ECC optimization 
+                previous-frame coordinates to current-frame coordinates. Returns
+                identity if initialization has not yet occurred or if ECC optimization
                 fails.
         """
         H_img, W_img = frame_bgr.shape[:2]
@@ -561,7 +573,9 @@ def _estimate_ecc(self, frame_bgr: np.ndarray) -> np.ndarray:
 
         if self.downscale > 1:
             frame = cv2.GaussianBlur(frame, (3, 3), 1.5)
-            frame = cv2.resize(frame, (W_img // self.downscale, H_img // self.downscale))
+            frame = cv2.resize(
+                frame, (W_img // self.downscale, H_img // self.downscale)
+            )
 
         if not self._initialized:
             self._prev_frame_gray = frame.copy()
@@ -584,23 +598,22 @@ def _estimate_ecc(self, frame_bgr: np.ndarray) -> np.ndarray:
             )
             if H_est is not None:
                 H_aff = H_est.astype(np.float32)
-        except cv2.error as e:
-            print('Warning: find transform failed. Set warp as identity')
+        except cv2.error:
+            print("Warning: find transform failed. Set warp as identity")
             pass
 
         # NOTE: this line is not included in the original BoT-SORT. However,
-        # in a working recurrent estimator, you do need to update the previous frame 
-        # after each call. Otherwise the next call would keep aligning against an old 
+        # in a working recurrent estimator, you do need to update the previous frame
+        # after each call. Otherwise the next call would keep aligning against an old
         # frame.
         self._prev_frame_gray = frame.copy()
-        
-        return H_aff
 
+        return H_aff
 
     @staticmethod
     def apply_to_tracks(tracks: list, H: np.ndarray) -> None:
         """
-        Apply a global affine motion transform to tracker states and covariances 
+        Apply a global affine motion transform to tracker states and covariances
         in-place.
 
         This method updates each track according to the affine transform
@@ -609,7 +622,7 @@ def apply_to_tracks(tracks: list, H: np.ndarray) -> None:
 
         where:
             R:
-                2x2 linear part of the affine transform (rotation / shear / scale-like 
+                2x2 linear part of the affine transform (rotation / shear / scale-like
                 part).
             t:
                 2D translation vector.
@@ -636,7 +649,7 @@ def apply_to_tracks(tracks: list, H: np.ndarray) -> None:
                     Velocities of the width and height.
 
         State update logic:
-            The affine transform is applied only to the geometric quantities that live 
+            The affine transform is applied only to the geometric quantities that live
             in the 2D image plane as position or velocity vectors:
 
             1) Center position:
@@ -651,13 +664,13 @@ def apply_to_tracks(tracks: list, H: np.ndarray) -> None:
         Why width and height are not transformed here:
             Width and height are scalar box dimensions, not 2D point coordinates.
             In this implementation, camera motion compensation is used to correct the
-            object center location and its image-plane velocity, while the box size 
-            terms are left unchanged. This keeps the compensation simple and consistent 
+            object center location and its image-plane velocity, while the box size
+            terms are left unchanged. This keeps the compensation simple and consistent
             with the state representation used by the tracker.
 
         Covariance update:
             Each track also stores a covariance matrix `P` describing uncertainty in the
-            8D Kalman state. After the mean state is transformed, the covariance is 
+            8D Kalman state. After the mean state is transformed, the covariance is
             updated using the linear transform
 
                 P = A @ P @ A.T
@@ -688,10 +701,10 @@ def apply_to_tracks(tracks: list, H: np.ndarray) -> None:
 
         Notes:
             - If `H` is None or `tracks` is empty, this method does nothing.
-            - The method assumes that `H` has already been estimated in image 
+            - The method assumes that `H` has already been estimated in image
               coordinates onsistent with the tracker state.
             - This method does not perform any validity checks on whether the estimated
-              transform is physically plausible; it simply applies the provided 
+              transform is physically plausible; it simply applies the provided
               transform.
         """
         if H is None or len(tracks) == 0:
@@ -715,8 +728,8 @@ def apply_to_tracks(tracks: list, H: np.ndarray) -> None:
 
             # Update the state covariance under the corresponding linear transform.
             A = np.eye(8, dtype=np.float32)
-            A[0:2, 0:2] = R      # center position
-            A[4:6, 4:6] = R      # center velocity
+            A[0:2, 0:2] = R  # center position
+            A[4:6, 4:6] = R  # center velocity
             # Box size terms (w, h, vw, vh) are not transformed in this implementation.
 
-            trk.P = (A @ trk.P @ A.T).astype(np.float32)
\ No newline at end of file
+            trk.P = (A @ trk.P @ A.T).astype(np.float32)
diff --git a/trackers/core/botsort/kalman_box_tracker.py b/trackers/core/botsort/kalman_box_tracker.py
index 91b1a409..c5d53769 100644
--- a/trackers/core/botsort/kalman_box_tracker.py
+++ b/trackers/core/botsort/kalman_box_tracker.py
@@ -162,7 +162,7 @@ def _initialize_kalman_filter(self, measurement: np.ndarray) -> None:
         This method initializes the following matrices:
 
         State transition matrix:
-            F is an 8x8 matrix defining how the state evolves from one frame to the 
+            F is an 8x8 matrix defining how the state evolves from one frame to the
             next. It implements a constant-velocity model:
                 xc <- xc + vxc
                 yc <- yc + vyc
@@ -209,49 +209,58 @@ def _initialize_kalman_filter(self, measurement: np.ndarray) -> None:
 
         self.H = np.eye(4, 8, dtype=np.float32)
 
-        # BoT-SORT-style scale-aware noise using width/height. 
+        # BoT-SORT-style scale-aware noise using width/height.
         sigma_p = 0.05
         sigma_v = 0.00625
         sigma_m = 0.05
 
         w, h = measurement[2], measurement[3]
 
-        q_diag = np.array([
-            (sigma_p * w) ** 2,
-            (sigma_p * h) ** 2,
-            (sigma_p * w) ** 2,
-            (sigma_p * h) ** 2,
-            (sigma_v * w) ** 2,
-            (sigma_v * h) ** 2,
-            (sigma_v * w) ** 2,
-            (sigma_v * h) ** 2,
-        ], dtype=np.float32)
+        q_diag = np.array(
+            [
+                (sigma_p * w) ** 2,
+                (sigma_p * h) ** 2,
+                (sigma_p * w) ** 2,
+                (sigma_p * h) ** 2,
+                (sigma_v * w) ** 2,
+                (sigma_v * h) ** 2,
+                (sigma_v * w) ** 2,
+                (sigma_v * h) ** 2,
+            ],
+            dtype=np.float32,
+        )
         self.Q = np.diag(q_diag)
 
-        r_diag = np.array([
-            (sigma_m * w) ** 2,
-            (sigma_m * h) ** 2,
-            (sigma_m * w) ** 2,
-            (sigma_m * h) ** 2,
-        ], dtype=np.float32)
+        r_diag = np.array(
+            [
+                (sigma_m * w) ** 2,
+                (sigma_m * h) ** 2,
+                (sigma_m * w) ** 2,
+                (sigma_m * h) ** 2,
+            ],
+            dtype=np.float32,
+        )
         self.R = np.diag(r_diag)
 
         # Initial covariance, as in original BoT-SORT KF
-        p_diag = np.array([
-            (2 * sigma_p * w) ** 2,
-            (2 * sigma_p * h) ** 2,
-            (2 * sigma_p * w) ** 2,
-            (2 * sigma_p * h) ** 2,
-            (10 * sigma_v * w) ** 2,
-            (10 * sigma_v * h) ** 2,
-            (10 * sigma_v * w) ** 2,
-            (10 * sigma_v * h) ** 2,
-        ], dtype=np.float32)
+        p_diag = np.array(
+            [
+                (2 * sigma_p * w) ** 2,
+                (2 * sigma_p * h) ** 2,
+                (2 * sigma_p * w) ** 2,
+                (2 * sigma_p * h) ** 2,
+                (10 * sigma_v * w) ** 2,
+                (10 * sigma_v * h) ** 2,
+                (10 * sigma_v * w) ** 2,
+                (10 * sigma_v * h) ** 2,
+            ],
+            dtype=np.float32,
+        )
         self.P = np.diag(p_diag)
 
     def _update_process_and_measurement_noise(self) -> None:
         """
-        Recompute the process and measurement noise covariances from the current box 
+        Recompute the process and measurement noise covariances from the current box
         size.
 
         This method updates:
@@ -267,13 +276,13 @@ def _update_process_and_measurement_noise(self) -> None:
         Why this update is needed:
             The scale of the uncertainty should depend on the current object size.
             For example, a 2-pixel error is relatively more important for a small object
-            than for a large one. Therefore, the diagonal entries of Q and R are 
+            than for a large one. Therefore, the diagonal entries of Q and R are
             computed from the current predicted width and height stored in the state.
 
         Implementation details:
             - Width and height are read from the current state:
                   w = state[2], h = state[3]
-            - They are clamped to a small positive minimum to avoid zero or negative 
+            - They are clamped to a small positive minimum to avoid zero or negative
               values.
             - The resulting Q and R matrices remain diagonal.
 
@@ -288,24 +297,30 @@ def _update_process_and_measurement_noise(self) -> None:
         w = max(float(self.state[2, 0]), 1e-3)
         h = max(float(self.state[3, 0]), 1e-3)
 
-        q_diag = np.array([
-            (sigma_p * w) ** 2,
-            (sigma_p * h) ** 2,
-            (sigma_p * w) ** 2,
-            (sigma_p * h) ** 2,
-            (sigma_v * w) ** 2,
-            (sigma_v * h) ** 2,
-            (sigma_v * w) ** 2,
-            (sigma_v * h) ** 2,
-        ], dtype=np.float32)
+        q_diag = np.array(
+            [
+                (sigma_p * w) ** 2,
+                (sigma_p * h) ** 2,
+                (sigma_p * w) ** 2,
+                (sigma_p * h) ** 2,
+                (sigma_v * w) ** 2,
+                (sigma_v * h) ** 2,
+                (sigma_v * w) ** 2,
+                (sigma_v * h) ** 2,
+            ],
+            dtype=np.float32,
+        )
         self.Q = np.diag(q_diag)
 
-        r_diag = np.array([
-            (sigma_m * w) ** 2,
-            (sigma_m * h) ** 2,
-            (sigma_m * w) ** 2,
-            (sigma_m * h) ** 2,
-        ], dtype=np.float32)
+        r_diag = np.array(
+            [
+                (sigma_m * w) ** 2,
+                (sigma_m * h) ** 2,
+                (sigma_m * w) ** 2,
+                (sigma_m * h) ** 2,
+            ],
+            dtype=np.float32,
+        )
         self.R = np.diag(r_diag)
 
     def predict(self) -> None:
@@ -326,9 +341,9 @@ def predict(self) -> None:
                 Process noise covariance.
 
         Effect of the prediction:
-            - The center position and box size are advanced using their current 
+            - The center position and box size are advanced using their current
               velocities.
-            - The covariance matrix P is propagated forward and increased by Q to 
+            - The covariance matrix P is propagated forward and increased by Q to
               reflect additional uncertainty introduced during motion prediction.
 
         Additional behavior:
@@ -431,4 +446,4 @@ def get_state_bbox(self) -> np.ndarray:
         """
         Return current predicted box in xyxy format.
         """
-        return self.xywh_to_xyxy(self.state[0:4, 0])
\ No newline at end of file
+        return self.xywh_to_xyxy(self.state[0:4, 0])
diff --git a/trackers/core/botsort/tracker.py b/trackers/core/botsort/tracker.py
index 8b09b953..f4d35842 100644
--- a/trackers/core/botsort/tracker.py
+++ b/trackers/core/botsort/tracker.py
@@ -12,18 +12,19 @@
 from scipy.optimize import linear_sum_assignment
 
 from trackers.core.base import BaseTracker
+from trackers.core.botsort.cmc import CMC, CMCConfig
 from trackers.core.botsort.kalman_box_tracker import BoTSORTKalmanBoxTracker
 from trackers.core.botsort.utils import (
     get_alive_trackers,
     get_iou_matrix,
 )
-from trackers.core.botsort.cmc import CMC, CMCConfig
+
 
 class BoTSORTTracker(BaseTracker):
     """
     BoT-SORT-style multi-object tracker (IoU association + optional CMC).
 
-    The tracker maintains a list of active tracks (Kalman-filter-based) and, for each 
+    The tracker maintains a list of active tracks (Kalman-filter-based) and, for each
     frame, performs:
       1) Predict existing track states (Kalman predict)
       2) Split detections into high/low confidence groups
@@ -33,23 +34,23 @@ class BoTSORTTracker(BaseTracker):
       6) Spawn new tracks from unmatched high-confidence detections
       7) Remove tracks that have been lost for too long
 
-    Parameters in __init__ control thresholds and lifecycle logic similarly to 
+    Parameters in __init__ control thresholds and lifecycle logic similarly to
     ByteTrack.
 
     Attributes:
         tracks: List of active `BoTSORTKalmanBoxTracker` objects.
-        maximum_frames_without_update: Max number of consecutive frames a track can go 
+        maximum_frames_without_update: Max number of consecutive frames a track can go
             unmatched before being removed.
-        minimum_consecutive_frames: Track maturity threshold before assigning a 
+        minimum_consecutive_frames: Track maturity threshold before assigning a
             permanent ID.
         minimum_iou_threshold_first_assoc: Minimum IoU required for a valid match
             in the first association step
         minimum_iou_threshold_second_assoc: Minimum IoU required for a valid match
             in the second association step
         track_activation_threshold: Confidence threshold for spawning a new track.
-        high_conf_det_threshold: Confidence threshold splitting detections into 
+        high_conf_det_threshold: Confidence threshold splitting detections into
             high/low groups.
-        enable_cmc: Whether to run camera motion compensation each frame 
+        enable_cmc: Whether to run camera motion compensation each frame
             (if `cmc` is set).
         cmc: Camera motion compensation instance (or None if disabled).
     """
@@ -66,7 +67,6 @@ def __init__(
         enable_cmc: bool = True,
         cmc_method: str = "sparseOptFlow",
         cmc_downscale: int = 2,
-
     ) -> None:
         """
         Initialize the tracker.
@@ -74,22 +74,22 @@ def __init__(
         Args:
             lost_track_buffer: Time buffer (in frames at 30 FPS) for keeping lost tracks
                 alive before deletion. This is scaled by `frame_rate`.
-            frame_rate: Video frame rate used to scale the lost track buffer to 
+            frame_rate: Video frame rate used to scale the lost track buffer to
                 time-like behavior.
-            track_activation_threshold: Minimum detection confidence to spawn a new 
+            track_activation_threshold: Minimum detection confidence to spawn a new
                 track.
-            minimum_consecutive_frames: Number of successful updates required before 
+            minimum_consecutive_frames: Number of successful updates required before
                 assigning a stable track ID (different than initial -1).
-            minimum_iou_threshold_first_assoc: Minimum IoU to accept a detection-track 
+            minimum_iou_threshold_first_assoc: Minimum IoU to accept a detection-track
                 association during the first association step.
-            minimum_iou_threshold_second_assoc: Minimum IoU to accept a detection-track 
+            minimum_iou_threshold_second_assoc: Minimum IoU to accept a detection-track
                 association during the second association step.
             high_conf_det_threshold: Confidence threshold used to split detections into:
                 - high confidence: confidence >= threshold
                 - low confidence:  confidence < threshold
             enable_cmc: Whether to enable camera motion compensation (CMC).
-            cmc_method: CMC method string passed into `CMCConfig(method=...)`. 
-                Supported values depend on `CMC` (e.g. "orb", "sift", "sparseOptFlow", 
+            cmc_method: CMC method string passed into `CMCConfig(method=...)`.
+                Supported values depend on `CMC` (e.g. "orb", "sift", "sparseOptFlow",
                 "ecc"). See CMCConfig.
             cmc_downscale: Downscale factor used inside CMC for speed/robustness.
 
@@ -110,8 +110,11 @@ def __init__(
         self.tracks: list[BoTSORTKalmanBoxTracker] = []
 
         self.enable_cmc = enable_cmc
-        self.cmc = CMC(CMCConfig(method=cmc_method, 
-                                 downscale=cmc_downscale)) if enable_cmc else None
+        self.cmc = (
+            CMC(CMCConfig(method=cmc_method, downscale=cmc_downscale))
+            if enable_cmc
+            else None
+        )
 
     def _update_detections(
         self,
@@ -125,16 +128,16 @@ def _update_detections(
 
         For each (track_idx, det_idx) match:
         - Update the track's Kalman state with the detection bbox.
-        - If the track is “mature” (>= minimum_consecutive_frames) and still has 
+        - If the track is “mature” (>= minimum_consecutive_frames) and still has
           tracker_id == -1, assign a new unique tracker ID.
-        - Create a single-row `sv.Detections` object for the matched detection and set 
+        - Create a single-row `sv.Detections` object for the matched detection and set
           its tracker_id to the track ID (or -1 if not mature yet).
         - Append it to `updated_detections`.
 
         Args:
             tracks: Tracks being updated.
             detections: Detections used for update.
-            updated_detections: Accumulator list of per-detection outputs for this 
+            updated_detections: Accumulator list of per-detection outputs for this
                 frame.
             matched_indices: List of (track_row_index, detection_col_index) pairs.
 
@@ -172,9 +175,9 @@ def update(
 
         Args:
             detections: Supervision detections for the current frame. Must include `
-                .xyxy`. Confidence (`detections.confidence`) is optional but 
+                .xyxy`. Confidence (`detections.confidence`) is optional but
                 recommended. The method writes/overwrites `detections.tracker_id`.
-            frame: Current video frame in BGR format (H, W, 3), required if CMC is 
+            frame: Current video frame in BGR format (H, W, 3), required if CMC is
                 enabled.
 
         Returns:
@@ -185,8 +188,8 @@ def update(
                 mature)
 
         Notes:
-            - If CMC is enabled, the tracker estimates a global affine transform (2x3) 
-              from the frame and uses it to warp predicted track states before 
+            - If CMC is enabled, the tracker estimates a global affine transform (2x3)
+              from the frame and uses it to warp predicted track states before
               association.
         """
         if len(self.tracks) == 0 and len(detections) == 0:
@@ -194,23 +197,25 @@ def update(
             return detections
         updated_detections: list[
             sv.Detections
-        ] = []  # List for returning the updated detections with its new assigned 
-                # track id # noqa: E501
+        ] = []  # List for returning the updated detections with its new assigned
+        # track id
 
         # Predict new locations for existing tracks
         for tracker in self.tracks:
             tracker.predict()
         # Assign a default tracker_id with the correct shape
         detections.tracker_id = -np.ones(len(detections))
-        # Split into high confidence boxes and lower based on 
-        # self.high_conf_det_threshold # noqa: E501
+        # Split into high confidence boxes and lower based on
+        # self.high_conf_det_threshold
         high_prob_detections, low_prob_detections = (
             self._get_high_and_low_probability_detections(detections)
         )
 
         # CMC (ORB) apply to all predicted tracks before association
         if self.enable_cmc and self.cmc is not None and frame is not None:
-            mask_boxes = high_prob_detections.xyxy if len(high_prob_detections) > 0 else None
+            mask_boxes = (
+                high_prob_detections.xyxy if len(high_prob_detections) > 0 else None
+            )
             H = self.cmc.estimate(frame, mask_boxes)
             self.cmc.apply_to_tracks(self.tracks, H)
 
@@ -219,7 +224,7 @@ def update(
             self._similarity_step(
                 high_prob_detections,
                 self.tracks,
-                self.minimum_iou_threshold_first_assoc
+                self.minimum_iou_threshold_first_assoc,
             )
         )
 
@@ -235,9 +240,9 @@ def update(
 
         # Step 2: associate Low Probability detections with remaining tracks
         matched_indices, unmatched_tracks, unmatched_detections = self._similarity_step(
-            low_prob_detections, 
-            remaining_tracks, 
-            self.minimum_iou_threshold_second_assoc
+            low_prob_detections,
+            remaining_tracks,
+            self.minimum_iou_threshold_second_assoc,
         )
 
         # Update matched tracks with low-confidence detections
@@ -274,10 +279,10 @@ def update(
         if len(final_updated_detections) == 0:
             final_updated_detections.tracker_id = np.array([], dtype=int)
         return final_updated_detections
-    
+
     def _get_high_and_low_probability_detections(
-            self, detections: sv.Detections
-        ) -> tuple[sv.Detections, sv.Detections]:
+        self, detections: sv.Detections
+    ) -> tuple[sv.Detections, sv.Detections]:
         """
         Split detections into high-confidence and low-confidence sets.
 
@@ -329,14 +334,14 @@ def _get_associated_indices(
         assignment problem in an optimal way.
 
         Args:
-            similarity_matrix: Similarity matrix between tracks (rows) and detections 
-            (columns). min_similarity_thresh: Minimum similarity threshold for a valid 
+            similarity_matrix: Similarity matrix between tracks (rows) and detections
+            (columns). min_similarity_thresh: Minimum similarity threshold for a valid
             match.
 
         Returns:
             Matched indices (list of (tracker_idx, detection_idx)), indices of
                 unmatched tracks, indices of unmatched detections.
-        """  # noqa: E501
+        """
         matched_indices = []
         n_tracks, n_detections = similarity_matrix.shape
         unmatched_tracks = set(range(n_tracks))
@@ -400,9 +405,9 @@ def _similarity_step(
         self,
         detections: sv.Detections,
         tracks: list[BoTSORTKalmanBoxTracker],
-        thresh: float
+        thresh: float,
     ) -> tuple[list[tuple[int, int]], set[int], set[int]]:
-        """Measures similarity based on IoU between tracks and detections and returns 
+        """Measures similarity based on IoU between tracks and detections and returns
             the matches and unmatched tracks/detections. Is used for step 1 and 2 of the
             BYTE algorithm.
 
@@ -418,13 +423,13 @@ def _similarity_step(
                   were not matched.
                 - unmatched_detections_indices: A set of indices for detections
                   that were not matched.
-        """  # noqa: E501
+        """
         # Build IoU cost matrix between detections and predicted bounding boxes
         similarity_matrix = get_iou_matrix(tracks, detections.xyxy)
 
         # Associate detections to tracks based on the higher value of the
-        # similarity matrix, using the Jonker-Volgenant algorithm 
-        # (linear_sum_assignment). # noqa: E501
+        # similarity matrix, using the Jonker-Volgenant algorithm
+        # (linear_sum_assignment).
         matched_indices, unmatched_tracks, unmatched_detections = (
             self._get_associated_indices(similarity_matrix, thresh)
         )
diff --git a/trackers/core/botsort/utils.py b/trackers/core/botsort/utils.py
index 078542e4..3f4fdf23 100644
--- a/trackers/core/botsort/utils.py
+++ b/trackers/core/botsort/utils.py
@@ -13,9 +13,7 @@
 
 from trackers.core.botsort.kalman_box_tracker import BoTSORTKalmanBoxTracker
 
-KalmanBoxTrackerType = TypeVar(
-    "KalmanBoxTrackerType", bound=BoTSORTKalmanBoxTracker 
-)
+KalmanBoxTrackerType = TypeVar("KalmanBoxTrackerType", bound=BoTSORTKalmanBoxTracker)
 
 BoTSORTKalmanBoxTracker
 

From 3e1c18f4d09da28d5a676f14e275c95aa9a85e6e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 25 Mar 2026 15:04:00 +0100
Subject: [PATCH 10/14] =?UTF-8?q?chore(pre=5Fcommit):=20=E2=AC=86=20pre=5F?=
 =?UTF-8?q?commit=20autoupdate=20(#333)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/astral-sh/ruff-pre-commit: v0.15.6 → v0.15.7](https://github.com/astral-sh/ruff-pre-commit/compare/v0.15.6...v0.15.7)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0168ef5f..73b34531 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -34,7 +34,7 @@ repos:
       - id: mixed-line-ending
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.15.6
+    rev: v0.15.7
     hooks:
       - id: ruff-check
         args: [--fix]

From a77fbbdcf168544d7fa5543d1e6097700cd694b7 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 25 Mar 2026 15:09:34 +0100
Subject: [PATCH 11/14] :arrow_up: Bump uv from 0.10.10 to 0.10.12 (#332)

Bumps [uv](https://github.com/astral-sh/uv) from 0.10.10 to 0.10.12.
- [Release notes](https://github.com/astral-sh/uv/releases)
- [Changelog](https://github.com/astral-sh/uv/blob/main/CHANGELOG.md)
- [Commits](https://github.com/astral-sh/uv/compare/0.10.10...0.10.12)

---
updated-dependencies:
- dependency-name: uv
  dependency-version: 0.10.12
  dependency-type: direct:development
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Piotr Skalski <piotr.skalski92@gmail.com>
---
 uv.lock | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/uv.lock b/uv.lock
index e93d28d6..4a8bb04d 100644
--- a/uv.lock
+++ b/uv.lock
@@ -3957,28 +3957,28 @@ wheels = [
 
 [[package]]
 name = "uv"
-version = "0.10.10"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/77/22/21476e738938bbb36fa0029d369c6989ade90039110a7013a24f4c6211c0/uv-0.10.10.tar.gz", hash = "sha256:266b24bf85aa021af37d3fb22d84ef40746bc4da402e737e365b12badff60e89", size = 3976117, upload-time = "2026-03-13T20:04:44.335Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/7a/2b/2cbc9ebc53dc84ad698c31583735605eb55627109af59d9d3424eb824935/uv-0.10.10-py3-none-linux_armv6l.whl", hash = "sha256:2c89017c0532224dc1ec6f3be1bc4ec3d8c3f291c23a229e8a40e3cc5828f599", size = 22712805, upload-time = "2026-03-13T20:03:36.034Z" },
-    { url = "https://files.pythonhosted.org/packages/14/44/4e8db982a986a08808cc5236e73c12bd6619823b3be41c9d6322d4746ebd/uv-0.10.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ee47b5bc1b8ccd246a3801611b2b71c8107db3a2b528e64463d737fd8e4f2798", size = 21857826, upload-time = "2026-03-13T20:03:52.852Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/98/aca12549cafc4c0346b04f8fed7f7ee3bfc2231b45b7e59d062d5b519746/uv-0.10.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:009a4c534e83bada52c8e2cccea6250e3486d01d609e4eb874cd302e2e534269", size = 20381437, upload-time = "2026-03-13T20:04:00.735Z" },
-    { url = "https://files.pythonhosted.org/packages/93/c4/f3f832e4871b2bb86423c4cdbbd40b10c835a426449e86951f992d63120a/uv-0.10.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:5dd85cc8ff9fa967c02c3edbf2b77d54b56bedcb56b323edec0df101f37f26e2", size = 22334006, upload-time = "2026-03-13T20:04:32.887Z" },
-    { url = "https://files.pythonhosted.org/packages/75/e1/852d1eb2630410f465287e858c93b2f2c81b668b7fa63c3f05356896706d/uv-0.10.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.musllinux_1_1_armv7l.whl", hash = "sha256:49235f8a745ef10eea24b2f07be1ee77da056792cef897630b78c391c5f1e2e4", size = 22303994, upload-time = "2026-03-13T20:04:04.849Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/39/1678ed510b7ee6d68048460c428ca26d57cc798ca34d4775e113e7801144/uv-0.10.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f97709570158efc87d52ddca90f2c96293eea382d81be295b1fd7088153d6a83", size = 22301619, upload-time = "2026-03-13T20:03:40.56Z" },
-    { url = "https://files.pythonhosted.org/packages/81/2f/e4137b7f3f07c0cc1597b49c341b30f09cea13dbe57cd83ad14f5839dfff/uv-0.10.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9c863fb46a62f3c8a1b7bc1520b0939c05cf4fab06e7233fc48ed17538e6601e", size = 23669879, upload-time = "2026-03-13T20:04:20.356Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/11/44f7f067b7dcfc57e21500918a50e0f2d56b23acdc9b2148dbd4d07b5078/uv-0.10.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8f56734baf7a8bd616da69cd7effe1a237c2cb364ec4feefe6a4b180f1cf5ec2", size = 24480854, upload-time = "2026-03-13T20:03:31.645Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/b5/d2bed329892b5298c493709bc851346d9750bafed51f8ba2b31e7d3ae0cc/uv-0.10.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1085cc907a1315002015bc218cc88e42c5171a03a705421341cdb420400ee2f3", size = 23677933, upload-time = "2026-03-13T20:03:57.052Z" },
-    { url = "https://files.pythonhosted.org/packages/02/95/84166104b968c02c2bb54c32082d702d29beb24384fb3f13ade0cb2456fb/uv-0.10.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e42e9e4a196ef75d1089715574eb1fe9bb62d390da05c6c8b36650a4de23d59f", size = 23473055, upload-time = "2026-03-13T20:03:48.648Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/b6/9cc6e5442e3734615b5dbf45dcacf94cd46a05b1d04066cbdb992701e6bf/uv-0.10.10-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:fbd827042dbdcadeb5e3418bee73ded9feb5ead8edac23e6e1b5dadb5a90f8b2", size = 22403569, upload-time = "2026-03-13T20:04:08.514Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/8c/2e0a3690603e86f8470bae3a27896a9f8b56677b5cd337d131c4d594e0dc/uv-0.10.10-py3-none-manylinux_2_31_riscv64.musllinux_1_1_riscv64.whl", hash = "sha256:41a3cc94e0c43070e48a521b6b26156ffde1cdc2088339891aa35eb2245ac5cf", size = 23309789, upload-time = "2026-03-13T20:03:44.764Z" },
-    { url = "https://files.pythonhosted.org/packages/24/e5/5af4d7426e39d7a7a751f8d1a7646d04e042a3c2c2c6aeb9d940ddc34df0/uv-0.10.10-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:8a59c80ade3aa20baf9ec5d17b6449f4fdba9212f6e3d1bdf2a6db94cbc64c21", size = 23329370, upload-time = "2026-03-13T20:04:24.525Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/10/94b773933cd2e39aa9768dd11f85f32844e4dcb687c6df0714dfb3c0234a/uv-0.10.10-py3-none-musllinux_1_1_i686.whl", hash = "sha256:e77e52ba74e0085a1c03a16611146c6f813034787f83a2fd260cdc8357e18d2d", size = 22818945, upload-time = "2026-03-13T20:04:29.064Z" },
-    { url = "https://files.pythonhosted.org/packages/85/71/6fb74f35ef3afdb6b3f77e35a29a571a5c789e89d97ec5cb7fd1285eb48e/uv-0.10.10-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:4f9fd7f62df91c2d91c02e2039d4c5bad825077d04ebd27af8ea35a8cc736daf", size = 23667652, upload-time = "2026-03-13T20:04:41.239Z" },
-    { url = "https://files.pythonhosted.org/packages/df/7b/3042f2fb5bf7288cbe7f954ca64badb1243bbac207c0119b4a2cef561564/uv-0.10.10-py3-none-win32.whl", hash = "sha256:52e8b70a4fd7a734833c6a55714b679a10b29cf69b2e663e657df1995cf11c6a", size = 21778937, upload-time = "2026-03-13T20:04:37.11Z" },
-    { url = "https://files.pythonhosted.org/packages/89/c8/d314c4aab369aa105959a6b266e3e082a1252b8517564ea7a28b439726a2/uv-0.10.10-py3-none-win_amd64.whl", hash = "sha256:3da90c197e8e9f5d49862556fa9f4a9dd5b8617c0bbcc88585664e777209a315", size = 24176234, upload-time = "2026-03-13T20:04:16.406Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/89/ea5852f4dadf01d6490131e5be88b2e12ea85b9cd5ffdc2efc933a3b6892/uv-0.10.10-py3-none-win_arm64.whl", hash = "sha256:3873b965d62b282ab51e328f4b15a760b32b11a7231dc3fe658fa11d98f20136", size = 22561685, upload-time = "2026-03-13T20:04:12.36Z" },
+version = "0.10.12"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/8d/b7/6a27678654caa7f2240d9c5be9bd032bfff90a58858f0078575e7a9b6d9f/uv-0.10.12.tar.gz", hash = "sha256:fa722691c7ae5c023778ad0b040ab8619367bcfe44fd0d9e05a58751af86cdf8", size = 3988720, upload-time = "2026-03-19T21:50:41.015Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fe/aa/dde1b7300f8e924606ab0fe192aa25ca79736c5883ee40310ba8a5b34042/uv-0.10.12-py3-none-linux_armv6l.whl", hash = "sha256:7099bdefffbe2df81accad52579657b8f9f870170caa779049c9fd82d645c9b3", size = 22662810, upload-time = "2026-03-19T21:50:43.108Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/90/4fd10d7337a084847403cdbff288395a6a12adbaaac975943df4f46c2d31/uv-0.10.12-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:e0f0ef58f0ba6fbfaf5f91b67aad6852252c49b8f78015a2a5800cf74c7538d5", size = 21852701, upload-time = "2026-03-19T21:51:06.216Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/db/c41ace81b8ef5d5952433df38e321c0b6e5f88ce210c508b14f84817963f/uv-0.10.12-py3-none-macosx_11_0_arm64.whl", hash = "sha256:551f799d53e397843b6cde7e3c61de716fb487da512a21a954b7d0cbc06967e0", size = 20454594, upload-time = "2026-03-19T21:50:53.693Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/07/a990708c5ba064b4eb1a289f1e9c484ebf5c1a0ea8cad049c86625f3b467/uv-0.10.12-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:a5afe619e8a861fe4d49df8e10d2c6963de0dac6b79350c4832bf3366c8496cf", size = 22212546, upload-time = "2026-03-19T21:51:08.76Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/26/7f5ac4af027846c24bd7bf0edbd48b805f9e7daec145c62c632b5ce94e5f/uv-0.10.12-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.musllinux_1_1_armv7l.whl", hash = "sha256:8dc352c93a47a4760cf824c31c55ce26511af780481e8f67c796d2779acaa928", size = 22278457, upload-time = "2026-03-19T21:51:19.895Z" },
+    { url = "https://files.pythonhosted.org/packages/02/00/c9043c73fb958482c9b42ad39ba81d1bd1ceffef11c4757412cb17f12316/uv-0.10.12-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd84379292e3c1a1bf0a05847c7c72b66bb581dccf8da1ef94cc82bf517efa7c", size = 22239751, upload-time = "2026-03-19T21:50:51.25Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/d1/31fe74bf2a049446dd95213890ffed98f733d0f5e3badafec59164951608/uv-0.10.12-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2ace05115bd9ee1b30d341728257fe051817c4c0a652c085c90d4bd4fb0bc8f2", size = 23697005, upload-time = "2026-03-19T21:50:48.767Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/9a/dd58ef59e622a1651e181ec5b7d304ae482e591f28a864c474d09ea00aff/uv-0.10.12-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be85acae8f31c68311505cd96202bad43165cbd7be110c59222f918677e93248", size = 24453680, upload-time = "2026-03-19T21:51:11.443Z" },
+    { url = "https://files.pythonhosted.org/packages/09/26/b5920b43d7c91e720b72feaf81ea8575fa6188b626607695199fb9a0b683/uv-0.10.12-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2bb5893d79179727253e4a283871a693d7773c662a534fb897aa65496aa35765", size = 23570067, upload-time = "2026-03-19T21:51:13.976Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/42/139e68d7d92bb90a33b5e269dbe474acb00b6c9797541032f859c5bf4c4d/uv-0.10.12-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:101481a1f48db6becf219914a591a588c0b3bfd05bef90768a5d04972bd6455e", size = 23498314, upload-time = "2026-03-19T21:50:36.104Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/75/40b237d005e4cdef9f960c215d3e2c0ab4f459ca009c3800cdcb07fbaa1d/uv-0.10.12-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:384b7f36a1ae50efe5f50fe299f276a83bf7acc8b7147517f34e27103270f016", size = 22314017, upload-time = "2026-03-19T21:50:56.45Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/c3/e65a6d795d5baf6fc113ff764650cc6dd792d745ff23f657e4c302877365/uv-0.10.12-py3-none-manylinux_2_31_riscv64.musllinux_1_1_riscv64.whl", hash = "sha256:2c21e1b36c384f75dd3fd4a818b04871158ce115efff0bb4fdcd18ba2df7bd48", size = 23321597, upload-time = "2026-03-19T21:50:39.012Z" },
+    { url = "https://files.pythonhosted.org/packages/65/ad/00f561b90b0ddfd1d591a78299fdeae68566e9cf82a4913548e4b700afef/uv-0.10.12-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:006812a086fce03d230fc987299f7295c7a73d17a1f1c17de1d1f327826f8481", size = 23336447, upload-time = "2026-03-19T21:50:58.764Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/6e/ddf50c9ad12cffa99dbb6d1ab920da8ba95e510982cf53df3424e8cbc228/uv-0.10.12-py3-none-musllinux_1_1_i686.whl", hash = "sha256:2c5dfc7560453186e911c8c2e4ce95cd1c91e1c5926c3b34c5a825a307217be9", size = 22855873, upload-time = "2026-03-19T21:51:01.13Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/9a/31a9c2f939849e56039bbe962aef6fb960df68c31bebd834d956876decfc/uv-0.10.12-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:b9ca1d264059cb016c853ebbc4f21c72d983e0f347c927ca29e283aec2f596cf", size = 23675276, upload-time = "2026-03-19T21:51:17.262Z" },
+    { url = "https://files.pythonhosted.org/packages/81/83/9225e3032f24fcb3b80ff97bbd4c28230de19f0f6b25dbad3ba6efda035e/uv-0.10.12-py3-none-win32.whl", hash = "sha256:cca36540d637c80d11d8a44a998a068355f0c78b75ec6b0f152ecbf89dfdd67b", size = 21739726, upload-time = "2026-03-19T21:50:46.155Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/9c/1954092ce17c00a8c299d39f8121e4c8d60f22a69c103f34d8b8dc68444d/uv-0.10.12-py3-none-win_amd64.whl", hash = "sha256:76ebe11572409dfbe20ec25a823f9bc8781400ece5356aa33ec44903af7ec316", size = 24219668, upload-time = "2026-03-19T21:51:03.591Z" },
+    { url = "https://files.pythonhosted.org/packages/37/92/9ca420deb5a7b6716d8746e1b05eb2c35a305ff3b4aa57061919087d82dd/uv-0.10.12-py3-none-win_arm64.whl", hash = "sha256:6727e3a0208059cd4d621684e580d5e254322dacbd806e0d218360abd0d48a68", size = 22544602, upload-time = "2026-03-19T21:51:22.678Z" },
 ]
 
 [[package]]

From 4341c0b8413f0e1aa8c661e72d8b14cca55103af Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 25 Mar 2026 16:15:55 +0100
Subject: [PATCH 12/14] :arrow_up: Bump inference-models from 0.20.1 to 0.22.0
 (#331)

Bumps inference-models from 0.20.1 to 0.22.0.

---
updated-dependencies:
- dependency-name: inference-models
  dependency-version: 0.22.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Piotr Skalski <piotr.skalski92@gmail.com>
---
 uv.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/uv.lock b/uv.lock
index 4a8bb04d..78ae1354 100644
--- a/uv.lock
+++ b/uv.lock
@@ -934,7 +934,7 @@ wheels = [
 
 [[package]]
 name = "inference-models"
-version = "0.20.1"
+version = "0.22.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "accelerate" },
@@ -968,9 +968,9 @@ dependencies = [
     { name = "torchvision" },
     { name = "transformers" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/31/2c/fbb5d52bfb477a5bbbf433f9ddbaecdda799d2c12d77acc31cd1dc731b9a/inference_models-0.20.1.tar.gz", hash = "sha256:71f74139b5d9db717a32c8f153151972fd1e6a8771252154b626fcdf36aa1aa9", size = 1652212, upload-time = "2026-03-12T15:49:00.66Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/7f/ed/91c7da89709edd53a30d3b2b5d5ac91fe947a78ea284c6ec16ac189edaec/inference_models-0.22.0.tar.gz", hash = "sha256:06dec33ee9a868f1e8261e34e60e1b15fd2c3d8126d5cdf0ae8ebdfde8e1ff30", size = 1660412, upload-time = "2026-03-20T18:04:48.988Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/dd/39/971f866d150e1aff85e45fc8a935c2229d8c2d2284ebc98097c67e0f6b97/inference_models-0.20.1-py3-none-any.whl", hash = "sha256:5e2787c43c47ed938748f309a95f04c7ecb74374db7bc27b7936329a330a01e8", size = 1817133, upload-time = "2026-03-12T15:48:58.749Z" },
+    { url = "https://files.pythonhosted.org/packages/06/99/18215129b667c46a993f89e9b37da25cd0b982b0a3a296269fed7a18d276/inference_models-0.22.0-py3-none-any.whl", hash = "sha256:6839111d5dd5b5403404b55e182fa3b1c44cd38c5a26430c589b7a0021ece126", size = 1827857, upload-time = "2026-03-20T18:04:47.399Z" },
 ]
 
 [[package]]

From 148613ae234679a8aa06f15562c11e6dbe347df1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 25 Mar 2026 16:26:49 +0100
Subject: [PATCH 13/14] :arrow_up: Bump mkdocs-material from 9.7.5 to 9.7.6
 (#330)

Bumps [mkdocs-material](https://github.com/squidfunk/mkdocs-material) from 9.7.5 to 9.7.6.
- [Release notes](https://github.com/squidfunk/mkdocs-material/releases)
- [Changelog](https://github.com/squidfunk/mkdocs-material/blob/master/CHANGELOG)
- [Commits](https://github.com/squidfunk/mkdocs-material/compare/9.7.5...9.7.6)

---
updated-dependencies:
- dependency-name: mkdocs-material
  dependency-version: 9.7.6
  dependency-type: direct:development
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Piotr Skalski <piotr.skalski92@gmail.com>
---
 uv.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/uv.lock b/uv.lock
index 78ae1354..651054bf 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1414,7 +1414,7 @@ wheels = [
 
 [[package]]
 name = "mkdocs-material"
-version = "9.7.5"
+version = "9.7.6"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "babel" },
@@ -1429,9 +1429,9 @@ dependencies = [
     { name = "pymdown-extensions" },
     { name = "requests" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/74/76/5c202fecdc45d53e83e03a85bae70c48b6c81e9f87f0bc19a9e9c723bdc0/mkdocs_material-9.7.5.tar.gz", hash = "sha256:f76bdab532bad1d9c57ca7187b37eccf64dd12e1586909307f8856db3be384ea", size = 4097749, upload-time = "2026-03-10T15:43:22.809Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/45/29/6d2bcf41ae40802c4beda2432396fff97b8456fb496371d1bc7aad6512ec/mkdocs_material-9.7.6.tar.gz", hash = "sha256:00bdde50574f776d328b1862fe65daeaf581ec309bd150f7bff345a098c64a69", size = 4097959, upload-time = "2026-03-19T15:41:58.161Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/45/e1/e8080dcfa95cca267662a6f4afe29237452bdeb5a2a6555ac83646d21915/mkdocs_material-9.7.5-py3-none-any.whl", hash = "sha256:7cf9df2ff121fd098ff6e05c732b0be3699afca9642e2dfe4926c40eb5873eec", size = 9305251, upload-time = "2026-03-10T15:43:19.089Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/01/bc663630c510822c95c47a66af9fa7a443c295b47d5f041e5e6ae62ef659/mkdocs_material-9.7.6-py3-none-any.whl", hash = "sha256:71b84353921b8ea1ba84fe11c50912cc512da8fe0881038fcc9a0761c0e635ba", size = 9305470, upload-time = "2026-03-19T15:41:55.217Z" },
 ]
 
 [[package]]

From 62e46b973729a04f7e98925fe0beff9ff9751939 Mon Sep 17 00:00:00 2001
From: Piotr Skalski <piotr.skalski92@gmail.com>
Date: Thu, 26 Mar 2026 13:20:27 +0100
Subject: [PATCH 14/14] feat(botsort): add integration tests and fix ruff/mypy
 errors (#335)

---
 pyproject.toml                              |  2 +
 test/core/test_tracker_integration.py       |  2 +-
 test/data/tracker_expected_dancetrack.json  |  6 +++
 test/data/tracker_expected_sportsmot.json   |  6 +++
 trackers/__init__.py                        |  2 +
 trackers/core/botsort/cmc.py                | 44 +++++++++++----------
 trackers/core/botsort/kalman_box_tracker.py |  6 +--
 trackers/core/botsort/tracker.py            | 24 ++++++-----
 8 files changed, 58 insertions(+), 34 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c662c6ee..c625c582 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -196,5 +196,7 @@ module = [
     "torchvision",
     "torchvision.transforms",
     "firerequests",
+    "scipy",
+    "scipy.optimize",
 ]
 ignore_missing_imports = true
diff --git a/test/core/test_tracker_integration.py b/test/core/test_tracker_integration.py
index 31c4688b..b00e8119 100644
--- a/test/core/test_tracker_integration.py
+++ b/test/core/test_tracker_integration.py
@@ -17,7 +17,7 @@
 from trackers.eval import evaluate_mot_sequences
 from trackers.io.mot import _load_mot_file, _mot_frame_to_detections, _MOTOutput
 
-_TRACKER_IDS = ["sort", "bytetrack", "ocsort"]
+_TRACKER_IDS = ["sort", "bytetrack", "ocsort", "botsort"]
 _METRICS = ["CLEAR", "HOTA", "Identity"]
 _TEST_DATA_DIR = Path(__file__).resolve().parent.parent / "data"
 
diff --git a/test/data/tracker_expected_dancetrack.json b/test/data/tracker_expected_dancetrack.json
index ebf5990b..c5c60aa8 100644
--- a/test/data/tracker_expected_dancetrack.json
+++ b/test/data/tracker_expected_dancetrack.json
@@ -16,5 +16,11 @@
         "MOTA": 98.187,
         "IDF1": 74.367,
         "IDSW": 631
+    },
+    "botsort": {
+        "HOTA": 79.999,
+        "MOTA": 99.511,
+        "IDF1": 76.389,
+        "IDSW": 614
     }
 }
diff --git a/test/data/tracker_expected_sportsmot.json b/test/data/tracker_expected_sportsmot.json
index fadde849..08c658a6 100644
--- a/test/data/tracker_expected_sportsmot.json
+++ b/test/data/tracker_expected_sportsmot.json
@@ -16,5 +16,11 @@
         "MOTA": 97.791,
         "IDF1": 79.21,
         "IDSW": 917
+    },
+    "botsort": {
+        "HOTA": 85.544,
+        "MOTA": 98.925,
+        "IDF1": 80.53,
+        "IDSW": 1107
     }
 }
diff --git a/trackers/__init__.py b/trackers/__init__.py
index 31c646df..a34ba7a9 100644
--- a/trackers/__init__.py
+++ b/trackers/__init__.py
@@ -7,6 +7,7 @@
 from __future__ import annotations
 
 from trackers.annotators.trace import MotionAwareTraceAnnotator
+from trackers.core.botsort.tracker import BoTSORTTracker
 from trackers.core.bytetrack.tracker import ByteTrackTracker
 from trackers.core.ocsort.tracker import OCSORTTracker
 from trackers.core.sort.tracker import SORTTracker
@@ -22,6 +23,7 @@
 from trackers.utils.converters import xcycsr_to_xyxy, xyxy_to_xcycsr
 
 __all__ = [
+    "BoTSORTTracker",
     "ByteTrackTracker",
     "CoordinatesTransformation",
     "Dataset",
diff --git a/trackers/core/botsort/cmc.py b/trackers/core/botsort/cmc.py
index 8deb976b..f04526ee 100644
--- a/trackers/core/botsort/cmc.py
+++ b/trackers/core/botsort/cmc.py
@@ -230,16 +230,16 @@ def __init__(self, cfg: CMCConfig | None = None) -> None:
         self.extractor = None
         self.matcher = None
         if self.cfg.method == "orb":
-            self.detector = cv2.FastFeatureDetector_create(self.cfg.fast_threshold)
-            self.extractor = cv2.ORB_create()
+            self.detector = cv2.FastFeatureDetector_create(self.cfg.fast_threshold)  # type: ignore[attr-defined]
+            self.extractor = cv2.ORB_create()  # type: ignore[attr-defined]
             self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING)
         elif self.cfg.method == "sift":
-            self.detector = cv2.SIFT_create(
+            self.detector = cv2.SIFT_create(  # type: ignore[attr-defined]
                 nOctaveLayers=self.cfg.sift_n_octave_layers,
                 contrastThreshold=self.cfg.sift_contrast_threshold,
                 edgeThreshold=int(self.cfg.sift_edge_threshold),
             )
-            self.extractor = cv2.SIFT_create(
+            self.extractor = cv2.SIFT_create(  # type: ignore[attr-defined]
                 nOctaveLayers=self.cfg.sift_n_octave_layers,
                 contrastThreshold=self.cfg.sift_contrast_threshold,
                 edgeThreshold=int(self.cfg.sift_edge_threshold),
@@ -369,8 +369,8 @@ def _estimate_feature_affine(
                     mask[y1b:y2b, x1b:x2b] = 0
 
         # Detect + describe (ORB)
-        kps = self.detector.detect(gray, mask)
-        kps, desc = self.extractor.compute(gray, kps)
+        kps = self.detector.detect(gray, mask)  # type: ignore[union-attr]
+        kps, desc = self.extractor.compute(gray, kps)  # type: ignore[union-attr]
 
         H_aff = np.eye(2, 3, dtype=np.float32)
 
@@ -386,7 +386,7 @@ def _estimate_feature_affine(
             self._prev_desc = None if desc is None else copy.copy(desc)
             return H_aff
 
-        knn = self.matcher.knnMatch(self._prev_desc, desc, k=2)
+        knn = self.matcher.knnMatch(self._prev_desc, desc, k=2)  # type: ignore[union-attr]
         if len(knn) == 0:
             self._prev_kps = copy.copy(kps)
             self._prev_desc = copy.copy(desc)
@@ -405,7 +405,7 @@ def _estimate_feature_affine(
                 continue
             m, n = pair
             if m.distance < 0.9 * n.distance:
-                p_prev = np.array(self._prev_kps[m.queryIdx].pt, dtype=np.float32)
+                p_prev = np.array(self._prev_kps[m.queryIdx].pt, dtype=np.float32)  # type: ignore[index]
                 p_curr = np.array(kps[m.trainIdx].pt, dtype=np.float32)
                 d = p_prev - p_curr
                 if (abs(d[0]) < max_spatial[0]) and (abs(d[1]) < max_spatial[1]):
@@ -414,12 +414,12 @@ def _estimate_feature_affine(
                     curr_pts.append(p_curr)
 
         if len(prev_pts) >= 5:
-            spatial = np.asarray(spatial, dtype=np.float32)
-            mean = spatial.mean(axis=0)
-            std = spatial.std(axis=0) + 1e-6
+            spatial_arr = np.asarray(spatial, dtype=np.float32)
+            mean = spatial_arr.mean(axis=0)
+            std = spatial_arr.std(axis=0) + 1e-6
             inl = np.logical_and(
-                np.abs(spatial[:, 0] - mean[0]) < 2.5 * std[0],
-                np.abs(spatial[:, 1] - mean[1]) < 2.5 * std[1],
+                np.abs(spatial_arr[:, 0] - mean[0]) < 2.5 * std[0],
+                np.abs(spatial_arr[:, 1] - mean[1]) < 2.5 * std[1],
             )
             prev_pts_np = np.asarray(prev_pts, dtype=np.float32)[inl]
             curr_pts_np = np.asarray(curr_pts, dtype=np.float32)[inl]
@@ -471,7 +471,7 @@ def _estimate_sparse_optflow(self, frame_bgr: np.ndarray) -> np.ndarray:
             )
 
         # Find keypoints in current frame
-        keypoints = cv2.goodFeaturesToTrack(frame, mask=None, **self.feature_params)
+        keypoints = cv2.goodFeaturesToTrack(frame, mask=None, **self.feature_params)  # type: ignore[call-overload]
 
         # First frame: init and return identity
         if not self._initialized:
@@ -492,7 +492,7 @@ def _estimate_sparse_optflow(self, frame_bgr: np.ndarray) -> np.ndarray:
 
         # Optical flow correspondences
         # calcOpticalFlowPyrLK will throw or return nonsense if we give it None
-        matched, status, _err = cv2.calcOpticalFlowPyrLK(
+        matched, status, _err = cv2.calcOpticalFlowPyrLK(  # type: ignore[call-overload]
             self._prev_frame_gray, frame, self._prev_points, None
         )
 
@@ -512,14 +512,16 @@ def _estimate_sparse_optflow(self, frame_bgr: np.ndarray) -> np.ndarray:
                 prev_pts.append(self._prev_points[i])
                 curr_pts.append(matched[i])
 
-        prev_pts = np.array(prev_pts)
-        curr_pts = np.array(curr_pts)
+        prev_pts_arr = np.array(prev_pts)
+        curr_pts_arr = np.array(curr_pts)
 
         # Find rigid matrix
-        if (np.size(prev_pts, 0) > 4) and (
-            np.size(prev_pts, 0) == np.size(curr_pts, 0)
+        if (np.size(prev_pts_arr, 0) > 4) and (
+            np.size(prev_pts_arr, 0) == np.size(curr_pts_arr, 0)
         ):
-            H_est, _ = cv2.estimateAffinePartial2D(prev_pts, curr_pts, cv2.RANSAC)
+            H_est, _ = cv2.estimateAffinePartial2D(  # type: ignore[call-overload]
+                prev_pts_arr, curr_pts_arr, cv2.RANSAC
+            )
             if H_est is not None:
                 H_aff = H_est.astype(np.float32)
 
@@ -587,7 +589,7 @@ def _estimate_ecc(self, frame_bgr: np.ndarray) -> np.ndarray:
             return H_aff
 
         try:
-            _cc, H_est = cv2.findTransformECC(
+            _cc, H_est = cv2.findTransformECC(  # type: ignore[call-overload]
                 self._prev_frame_gray,
                 frame,
                 H_aff,
diff --git a/trackers/core/botsort/kalman_box_tracker.py b/trackers/core/botsort/kalman_box_tracker.py
index c5d53769..f38a1939 100644
--- a/trackers/core/botsort/kalman_box_tracker.py
+++ b/trackers/core/botsort/kalman_box_tracker.py
@@ -360,10 +360,10 @@ def predict(self) -> None:
         self._update_process_and_measurement_noise()
 
         # Predict state
-        self.state = self.F @ self.state
+        self.state = self.F @ self.state  # type: ignore[assignment]
 
         # Predict error (uncertainty) covariance
-        self.P = self.F @ self.P @ self.F.T + self.Q
+        self.P = self.F @ self.P @ self.F.T + self.Q  # type: ignore[assignment]
 
         # Prevent degenerate box shape
         self.state[2, 0] = max(self.state[2, 0], 1e-3)
@@ -437,7 +437,7 @@ def update(self, bbox: np.ndarray) -> None:
 
         # Update covariance
         identity_matrix = np.eye(8, dtype=np.float32)
-        self.P = (identity_matrix - K @ self.H) @ self.P
+        self.P = (identity_matrix - K @ self.H) @ self.P  # type: ignore[assignment]
 
         self.state[2, 0] = max(self.state[2, 0], 1e-3)
         self.state[3, 0] = max(self.state[3, 0], 1e-3)
diff --git a/trackers/core/botsort/tracker.py b/trackers/core/botsort/tracker.py
index f4d35842..696280c0 100644
--- a/trackers/core/botsort/tracker.py
+++ b/trackers/core/botsort/tracker.py
@@ -5,7 +5,7 @@
 # ------------------------------------------------------------------------
 
 from copy import deepcopy
-from typing import cast
+from typing import Literal, cast
 
 import numpy as np
 import supervision as sv
@@ -55,6 +55,8 @@ class BoTSORTTracker(BaseTracker):
         cmc: Camera motion compensation instance (or None if disabled).
     """
 
+    tracker_id = "botsort"
+
     def __init__(
         self,
         lost_track_buffer: int = 30,
@@ -65,7 +67,7 @@ def __init__(
         minimum_iou_threshold_second_assoc: float = 0.5,
         high_conf_det_threshold: float = 0.6,
         enable_cmc: bool = True,
-        cmc_method: str = "sparseOptFlow",
+        cmc_method: Literal["orb", "sift", "sparseOptFlow", "ecc"] = "sparseOptFlow",
         cmc_downscale: int = 2,
     ) -> None:
         """
@@ -163,10 +165,10 @@ def _update_detections(
             updated_detections.append(new_det)
         return updated_detections
 
-    def update(
+    def update(  # type: ignore[override]
         self,
         detections: sv.Detections,
-        frame: np.ndarray,
+        frame: np.ndarray | None = None,
     ) -> sv.Detections:
         """
         Update the tracker with detections from the current frame.
@@ -255,7 +257,10 @@ def update(
 
         # Add unmatched low prob predictions to updated predictions
         for det_index in unmatched_detections:
-            new_det = deepcopy(low_prob_detections[det_index : det_index + 1])
+            new_det = cast(
+                sv.Detections,
+                deepcopy(low_prob_detections[det_index : det_index + 1]),
+            )
 
             new_det.tracker_id = np.array([-1])
             updated_detections.append(new_det)
@@ -309,16 +314,17 @@ def _get_high_and_low_probability_detections(
         """
 
         if detections.confidence is None:
-            # If no confidence information exists, treat all detections as high-confidence
-            return detections, detections[:0]
+            # If no confidence information exists, treat all detections
+            # as high-confidence
+            return detections, cast(sv.Detections, detections[:0])
 
         conf = detections.confidence
 
         high_mask = conf >= self.high_conf_det_threshold
         low_mask = (conf > 0.1) & (conf < self.high_conf_det_threshold)
 
-        high_confidence = detections[high_mask]
-        low_confidence = detections[low_mask]
+        high_confidence = cast(sv.Detections, detections[high_mask])
+        low_confidence = cast(sv.Detections, detections[low_mask])
 
         return high_confidence, low_confidence