diff --git a/docs/source/guides/performance_metrics.md b/docs/source/guides/performance_metrics.md
index 6280028d..d5fb7f31 100644
--- a/docs/source/guides/performance_metrics.md
+++ b/docs/source/guides/performance_metrics.md
@@ -16,6 +16,8 @@ Performance metrics are automatically collected during model inference and inclu
 - **Total frames**: Total number of inferences
 - **FPS**: Frames Per Second
 
+All timing statistics are reported in milliseconds (ms).
+
 Each metric provides statistical information including mean, standard deviation, and individual measurements.
 
 ## Basic Usage
@@ -59,21 +61,21 @@ This will output detailed performance information:
 ============================================================
 
 📊 Model Loading:
-   Load Time: 2.497s
+    Load Time: 2497.00 ms
 
 ⚙️  Processing Times (mean ± std):
-   Preprocess:  0.001s ± 0.000s
-   Inference:   0.570s ± 0.020s
-   Postprocess: 0.001s ± 0.000s
+    Preprocess:  1.00 ms ± 0.10 ms
+    Inference:   570.00 ms ± 20.00 ms
+    Postprocess: 1.00 ms ± 0.10 ms
 
 📈 Total Time Statistics:
-   Mean:  0.572s ± 0.020s
-   Min:   0.556s
-   Max:   0.642s
+    Mean:  572.00 ms ± 20.00 ms
+    Min:   556.00 ms
+    Max:   642.00 ms
 
 🎯 Performance Summary:
-   Total Frames: 100
-   FPS:          1.75
+    Total Frames: 100
+    FPS:          1.75
 ============================================================
 ```
 
@@ -94,9 +96,9 @@ total_min_time = metrics.get_total_time_min()
 total_max_time = metrics.get_total_time_max()
 
 # Access statistical information
-print(f"Mean inference time: {inference_time.mean():.3f} seconds")
-print(f"Standard deviation: {inference_time.stddev():.3f} seconds")
-print(f"Total inference time: {inference_time.time:.3f} seconds")
+print(f"Mean inference time: {inference_time.mean():.2f} ms")
+print(f"Standard deviation: {inference_time.stddev():.2f} ms")
+print(f"Total inference time: {inference_time.time:.2f} ms")
 print(f"Number of inferences: {inference_time.count}")
 ```
 
@@ -150,7 +152,7 @@ for i in range(100):
     if (i + 1) % 10 == 0:
         metrics = model.get_performance_metrics()
         print(f"After {i + 1} inferences:")
-        print(f"  Mean inference time: {metrics.get_inference_time().mean():.3f}s")
+        print(f"  Mean inference time: {metrics.get_inference_time().mean():.2f} ms")
         print(f"  Current FPS: {metrics.get_fps():.2f}")
 ```
 
@@ -166,13 +168,12 @@ metrics = model.get_performance_metrics()
 preprocess_time = metrics.get_preprocess_time().mean()
 inference_time = metrics.get_inference_time().mean()
 postprocess_time = metrics.get_postprocess_time().mean()
+total = preprocess_time + inference_time + postprocess_time
 
 print("Time breakdown:")
-print(f"  Preprocessing: {preprocess_time:.3f}s ({preprocess_time/total:.1%})")
-print(f"  Inference:     {inference_time:.3f}s ({inference_time/total:.1%})")
-print(f"  Postprocessing: {postprocess_time:.3f}s ({postprocess_time/total:.1%})")
-
-total = preprocess_time + inference_time + postprocess_time
+print(f"  Preprocessing: {preprocess_time:.2f} ms ({preprocess_time/total:.1%})")
+print(f"  Inference:     {inference_time:.2f} ms ({inference_time/total:.1%})")
+print(f"  Postprocessing: {postprocess_time:.2f} ms ({postprocess_time/total:.1%})")
 ```
 
 ### Warm-up Considerations
@@ -184,8 +185,12 @@ The first few inferences may be slower due to system warm-up. Consider excluding
 for _ in range(5):
     model(image)
 
-# Reset metrics after warm-up
-model.get_performance_metrics().reset()
+# Reset metrics after warm-up (load time stats are preserved by default)
+metrics = model.get_performance_metrics()
+metrics.reset()
+
+# If you also need to clear model load measurements
+# metrics.reset(include_load_time=True)
 
 # Now measure actual performance
 for _ in range(100):
@@ -227,8 +232,9 @@ def analyze_model_performance(model_path, test_images, warmup_runs=5, test_runs=
     for _ in range(warmup_runs):
         model(image)
 
-    # Reset metrics after warm-up
-    model.get_performance_metrics().reset()
+    # Reset metrics after warm-up (keeping load time by default)
+    metrics = model.get_performance_metrics()
+    metrics.reset()
 
     print(f"Running {test_runs} test inferences...")
     # Performance measurement runs
@@ -252,9 +258,11 @@ def analyze_model_performance(model_path, test_images, warmup_runs=5, test_runs=
     # Additional analysis
     inference_time = metrics.get_inference_time()
     print(f"\nInference time analysis:")
-    print(f"  Minimum: {min(inference_time.durations):.3f}s")
-    print(f"  Maximum: {max(inference_time.durations):.3f}s")
-    print(f"  Median: {sorted(inference_time.durations)[len(inference_time.durations)//2]:.3f}s")
+    print(f"  Minimum: {min(inference_time.durations):.2f} ms")
+    print(f"  Maximum: {max(inference_time.durations):.2f} ms")
+    print(
+        f"  Median: {sorted(inference_time.durations)[len(inference_time.durations)//2]:.2f} ms"
+    )
 
     return metrics
 
diff --git a/examples/metrics/benchmark.py b/examples/metrics/benchmark.py
index 6c896527..690efb35 100644
--- a/examples/metrics/benchmark.py
+++ b/examples/metrics/benchmark.py
@@ -31,12 +31,201 @@ def get_image_files(dataset_path: str) -> list[str]:
     return test_images
 
 
-def analyze_model_performance(model_path, test_images, device, warmup_runs, test_runs):
+def analyze_model_performance(  # noqa: C901
+    model_path,
+    test_images,
+    device,
+    warmup_runs,
+    test_runs,
+    num_streams=None,
+    num_threads=None,
+    max_num_requests=0,
+    performance_hint=None,
+    performance_hint_num_requests=None,
+    use_async=False,
+    inference_only=False,
+):
     """Complete performance analysis example."""
 
-    # Load model
-    model = Model.create_model(model_path, device=device)
+    # Import required components
+    import time
 
+    import numpy as np
+    from openvino import Core
+
+    from model_api.adapters.openvino_adapter import OpenvinoAdapter, get_user_config
+
+    # Build plugin config
+    core = Core()
+    plugin_config = get_user_config(
+        device,
+        num_streams or "1",
+        num_threads,
+    )
+
+    # Add performance hint if specified
+    if performance_hint:
+        plugin_config["PERFORMANCE_HINT"] = performance_hint
+    if performance_hint_num_requests is not None:
+        plugin_config["PERFORMANCE_HINT_NUM_REQUESTS"] = str(performance_hint_num_requests)
+
+    # Create adapter with custom plugin config
+    inference_adapter = OpenvinoAdapter(
+        core=core,
+        model=model_path,
+        device=device,
+        plugin_config=plugin_config,
+        max_num_requests=max_num_requests,
+    )
+    model = Model.create_model(inference_adapter, preload=True)
+
+    if inference_only:
+        # Inference-only mode: use pre-filled random data, no I/O or preprocessing
+        print("Benchmarking in inference only mode (inputs filling are not included in measurement loop).")
+
+        # Get input shape and create random data
+        # For image models, we need to create a proper image-like array
+        input_layer_name = next(iter(model.inputs.keys()))
+        input_shape = model.inputs[input_layer_name].shape
+        input_precision = model.inputs[input_layer_name].precision
+        input_layout = model.inputs[input_layer_name].layout
+
+        print(f"Input: {input_layer_name}, shape: {input_shape}, precision: {input_precision}, layout: {input_layout}")
+
+        # Get actual compiled model input to check for embedded preprocessing
+        compiled_inputs = inference_adapter.compiled_model.inputs
+        actual_input = compiled_inputs[0]
+        actual_partial_shape = actual_input.partial_shape
+        actual_layout = str(actual_input.layout) if hasattr(actual_input, "layout") else str(input_layout)
+
+        print(f"Compiled model input partial_shape: {actual_partial_shape}, layout: {actual_layout}")
+
+        # Create random tensor data matching the actual compiled model input
+        # If shape has dynamic dimensions, use reasonable defaults
+        tensor_shape = []
+        for i, dim in enumerate(actual_partial_shape):
+            if dim.is_dynamic:
+                # Dynamic dimension - use reasonable default
+                if i == 0:
+                    tensor_shape.append(1)  # Batch
+                elif i == 3 and actual_layout == "NHWC":
+                    tensor_shape.append(3)  # Channels for NHWC
+                else:
+                    tensor_shape.append(224)  # Height/Width default
+            else:
+                tensor_shape.append(dim.get_length())
+
+        print(f"Using tensor shape: {tensor_shape}")
+
+        # Create random tensor data using Generator API
+        rng = np.random.default_rng()
+        if input_precision == "FP32" or input_precision == "f32":
+            random_tensor = rng.standard_normal(tensor_shape).astype(np.float32)
+        elif input_precision == "FP16" or input_precision == "f16":
+            random_tensor = rng.standard_normal(tensor_shape).astype(np.float16)
+        elif input_precision == "U8" or input_precision == "u8":
+            random_tensor = rng.integers(0, 256, size=tensor_shape, dtype=np.uint8)
+        elif input_precision == "I8" or input_precision == "i8":
+            random_tensor = rng.integers(-128, 128, size=tensor_shape, dtype=np.int8)
+        else:
+            random_tensor = rng.standard_normal(tensor_shape).astype(np.float32)
+
+        # Create dict_data directly without preprocessing
+        dict_data = {input_layer_name: random_tensor}
+
+        print(f"Raw tensor shape: {random_tensor.shape}, dtype: {random_tensor.dtype}")
+
+        print("Starting warm-up...")
+        # Warm-up using raw inference - bypass model wrapper, use adapter directly
+        for _ in range(warmup_runs):
+            inference_adapter.infer_sync(dict_data)
+
+        if use_async:
+            print(f"Running {test_runs} test inferences (async mode, inference only)...")
+            completed_count = [0]  # Use list to allow modification in callback
+            first_start_time = [None]
+            last_end_time = [None]
+
+            def callback(request, userdata):
+                completed_count[0] += 1
+                last_end_time[0] = time.perf_counter()
+
+            # Set callback on the inference adapter directly
+            inference_adapter.set_callback(callback)
+
+            # Measure from first submission to last completion
+            first_start_time[0] = time.perf_counter()
+
+            # Submit all inference requests
+            for i in range(test_runs):
+                inference_adapter.infer_async(dict_data, i)
+                if (i + 1) % 10 == 0:
+                    print(f"  Submitted {i + 1}/{test_runs}")
+
+            # Wait for all requests to complete
+            print("  Waiting for all requests to complete...")
+            inference_adapter.await_all()
+
+            total_duration = (last_end_time[0] - first_start_time[0]) * 1000  # Convert to ms
+
+            print(f"  Completed {completed_count[0]}/{test_runs}")
+
+            # Calculate metrics
+            avg_latency = total_duration / test_runs
+            throughput = test_runs / (total_duration / 1000)
+
+            print("\n" + "=" * 60)
+            print("           🚀 PERFORMANCE METRICS REPORT 🚀           ")
+            print("=" * 60)
+            print(f"\n📊 Execution Devices: [{device}]")
+            print(f"   Count: {test_runs} iterations")
+            print(f"   Duration: {total_duration:.2f} ms")
+            print("\n📈 Latency:")
+            print(f"   Average: {avg_latency:.2f} ms")
+            print(f"\n🎯 Throughput: {throughput:.2f} FPS")
+            print("=" * 60 + "\n")
+
+        else:
+            print(f"Running {test_runs} test inferences (sync mode, inference only)...")
+            inference_times = []
+
+            # Measure each inference separately for latency statistics
+            for i in range(test_runs):
+                start_time = time.perf_counter()
+                inference_adapter.infer_sync(dict_data)
+                end_time = time.perf_counter()
+
+                inference_times.append((end_time - start_time) * 1000)  # Convert to ms
+
+                if (i + 1) % 10 == 0:
+                    print(f"  Completed {i + 1}/{test_runs}")
+
+            # Calculate statistics
+            inference_times = np.array(inference_times)
+            total_duration = np.sum(inference_times)
+            mean_latency = np.mean(inference_times)
+            median_latency = np.median(inference_times)
+            min_latency = np.min(inference_times)
+            max_latency = np.max(inference_times)
+            throughput = test_runs / (total_duration / 1000)
+
+            print("\n" + "=" * 60)
+            print("           🚀 PERFORMANCE METRICS REPORT 🚀           ")
+            print("=" * 60)
+            print(f"\n📊 Execution Devices: [{device}]")
+            print(f"   Count: {test_runs} iterations")
+            print(f"   Duration: {total_duration:.2f} ms")
+            print("\n📈 Latency:")
+            print(f"   Median:  {median_latency:.2f} ms")
+            print(f"   Average: {mean_latency:.2f} ms")
+            print(f"   Min:     {min_latency:.2f} ms")
+            print(f"   Max:     {max_latency:.2f} ms")
+            print(f"\n🎯 Throughput: {throughput:.2f} FPS")
+            print("=" * 60 + "\n")
+
+        return None
+
+    # Regular mode with image loading and full preprocessing
     # Load test image
     image = cv2.imread(test_images[0])
 
@@ -48,14 +237,37 @@ def analyze_model_performance(model_path, test_images, device, warmup_runs, test
     # Reset metrics after warm-up
     model.get_performance_metrics().reset()
 
-    print(f"Running {test_runs} test inferences...")
-    # Performance measurement runs
-    for i, image_path in enumerate(test_images[:test_runs]):
-        image = cv2.imread(image_path)
-        model(image)
-        # Log progress
-        if (i + 1) % 10 == 0:
-            print(f"  Completed {i + 1}/{test_runs}")
+    if use_async:
+        print(f"Running {test_runs} test inferences (async mode)...")
+        # Async inference mode - submit all requests concurrently
+        results = {}
+
+        def callback(result, userdata):
+            results[userdata] = result
+
+        model.set_callback(callback)
+
+        # Submit all inference requests without blocking
+        for i, image_path in enumerate(test_images[:test_runs]):
+            image = cv2.imread(image_path)
+            model.infer_async(image, user_data=i)
+            # Log progress
+            if (i + 1) % 10 == 0:
+                print(f"  Submitted {i + 1}/{test_runs}")
+
+        # Wait for all requests to complete
+        print("  Waiting for all requests to complete...")
+        model.await_all()
+        print(f"  Completed {len(results)}/{test_runs}")
+    else:
+        print(f"Running {test_runs} test inferences (sync mode)...")
+        # Synchronous inference mode - process images sequentially
+        for i, image_path in enumerate(test_images[:test_runs]):
+            image = cv2.imread(image_path)
+            model(image)
+            # Log progress
+            if (i + 1) % 10 == 0:
+                print(f"  Completed {i + 1}/{test_runs}")
 
     # Analyze results
     metrics = model.get_performance_metrics()
@@ -72,6 +284,46 @@ def main():
     parser.add_argument("--warmup-runs", type=int, default=5, help="Number of warmup runs (default: 5)")
     parser.add_argument("--test-runs", type=int, default=100, help="Number of test runs (default: 100)")
 
+    # OpenVINO performance tuning options
+    parser.add_argument(
+        "--performance-hint",
+        type=str,
+        choices=["LATENCY", "THROUGHPUT", "CUMULATIVE_THROUGHPUT"],
+        help="OpenVINO performance hint (LATENCY, THROUGHPUT, CUMULATIVE_THROUGHPUT)",
+    )
+    parser.add_argument(
+        "--num-streams",
+        type=str,
+        help="Number of inference streams (e.g., '1', '2', 'NUM_STREAMS_AUTO')",
+    )
+    parser.add_argument(
+        "--num-threads",
+        type=int,
+        help="Number of CPU threads for inference",
+    )
+    parser.add_argument(
+        "--max-num-requests",
+        type=int,
+        default=0,
+        help="Maximum number of infer requests for asynchronous inference (default: 0 = auto)",
+    )
+    parser.add_argument(
+        "--performance-hint-num-requests",
+        type=int,
+        help="Number of requests for performance hint optimization",
+    )
+    parser.add_argument(
+        "--async",
+        dest="use_async",
+        action="store_true",
+        help="Use asynchronous inference mode for concurrent request submission (recommended for THROUGHPUT mode)",
+    )
+    parser.add_argument(
+        "--inference-only",
+        action="store_true",
+        help="Use inference-only mode with pre-filled random data (no image I/O or preprocessing overhead)",
+    )
+
     # Show help if no arguments are provided
     if len(sys.argv) == 1:
         parser.print_help()
@@ -82,16 +334,31 @@ def main():
     model_path = args.model_path
     dataset_path = args.dataset_path
 
-    # Get list of image files from the directory
-    test_images = get_image_files(dataset_path)
-
-    print(f"Found {len(test_images)} images in {dataset_path}")
-
-    if not test_images:
-        print("Error: No images found in the dataset directory!")
-        exit(1)
+    # Get list of image files from the directory (only needed if not in inference-only mode)
+    if not args.inference_only:
+        test_images = get_image_files(dataset_path)
+        print(f"Found {len(test_images)} images in {dataset_path}")
+        if not test_images:
+            print("Error: No images found in the dataset directory!")
+            exit(1)
+    else:
+        # Inference-only mode doesn't need actual images
+        test_images = None
 
-    analyze_model_performance(model_path, test_images, args.device, args.warmup_runs, args.test_runs)
+    analyze_model_performance(
+        model_path,
+        test_images,
+        args.device,
+        args.warmup_runs,
+        args.test_runs,
+        num_streams=args.num_streams,
+        num_threads=args.num_threads,
+        max_num_requests=args.max_num_requests,
+        performance_hint=args.performance_hint,
+        performance_hint_num_requests=args.performance_hint_num_requests,
+        use_async=args.use_async,
+        inference_only=args.inference_only,
+    )
 
 
 if __name__ == "__main__":
diff --git a/src/model_api/metrics/performance.py b/src/model_api/metrics/performance.py
index 3504ee17..ebf8a169 100644
--- a/src/model_api/metrics/performance.py
+++ b/src/model_api/metrics/performance.py
@@ -5,7 +5,7 @@
 
 import logging
 
-from .time_stat import TimeStat
+from .time_stat import MS_IN_SECOND, TimeStat
 
 logger = logging.getLogger(__name__)
 
@@ -37,12 +37,19 @@ def __add__(self, other):
         new_metrics.preprocess_time = self.preprocess_time + other.preprocess_time
         new_metrics.inference_time = self.inference_time + other.inference_time
         new_metrics.postprocess_time = self.postprocess_time + other.postprocess_time
+        new_metrics.total_time = self.total_time + other.total_time
         return new_metrics
 
-    def reset(self) -> None:
+    def reset(self, include_load_time: bool = False) -> None:
         """
         Resets performance metrics to the initial state.
+
+        Args:
+            include_load_time (bool, optional): Whether to reset the load time statistics as well.
+                Defaults to False to keep model load measurements available across runs.
         """
+        if include_load_time:
+            self.load_time.reset()
         self.preprocess_time.reset()
         self.inference_time.reset()
         self.postprocess_time.reset()
@@ -84,6 +91,10 @@ def get_postprocess_time(self) -> TimeStat:
         """
         return self.postprocess_time
 
+    def get_total_time(self) -> TimeStat:
+        """Returns the total pipeline time statistics."""
+        return self.total_time
+
     def get_total_frames(self) -> int:
         """
         Returns the total number of frames processed.
@@ -100,7 +111,10 @@ def get_fps(self) -> float:
         Returns:
             float: Frames Per Second.
         """
-        return self.get_total_frames() / sum(self.total_time.durations) if sum(self.total_time.durations) > 0 else 0.0
+        total_time_ms = self.total_time.time
+        if total_time_ms <= 0:
+            return 0.0
+        return self.get_total_frames() / (total_time_ms / MS_IN_SECOND)
 
     def get_total_time_min(self) -> float:
         """
@@ -132,17 +146,17 @@ def log_metrics(self) -> None:
             "=" * 60,
             "",
             "📊 Model Loading:",
-            f"   Load Time: {self.load_time.mean():.3f}s",
+            f"   Load Time: {self.load_time.mean():.2f} ms",
             "",
             "⚙️  Processing Times (mean ± std):",
-            f"   Preprocess:  {self.preprocess_time.mean():.3f}s ± {self.preprocess_time.stddev():.3f}s",
-            f"   Inference:   {self.inference_time.mean():.3f}s ± {self.inference_time.stddev():.3f}s",
-            f"   Postprocess: {self.postprocess_time.mean():.3f}s ± {self.postprocess_time.stddev():.3f}s",
+            f"   Preprocess:  {self.preprocess_time.mean():.2f} ms ± {self.preprocess_time.stddev():.2f} ms",
+            f"   Inference:   {self.inference_time.mean():.2f} ms ± {self.inference_time.stddev():.2f} ms",
+            f"   Postprocess: {self.postprocess_time.mean():.2f} ms ± {self.postprocess_time.stddev():.2f} ms",
             "",
             "📈 Total Time Statistics:",
-            f"   Mean:  {self.total_time.mean():.3f}s ± {self.total_time.stddev():.3f}s",
-            f"   Min:   {self.get_total_time_min():.3f}s",
-            f"   Max:   {self.get_total_time_max():.3f}s",
+            f"   Mean:  {self.total_time.mean():.2f} ms ± {self.total_time.stddev():.2f} ms",
+            f"   Min:   {self.get_total_time_min():.2f} ms",
+            f"   Max:   {self.get_total_time_max():.2f} ms",
             "",
             "🎯 Performance Summary:",
             f"   Total Frames: {self.get_total_frames():,}",
diff --git a/src/model_api/metrics/time_stat.py b/src/model_api/metrics/time_stat.py
index 02657118..ea098a2d 100644
--- a/src/model_api/metrics/time_stat.py
+++ b/src/model_api/metrics/time_stat.py
@@ -4,6 +4,11 @@
 #
 
 from time import perf_counter
+from typing import Any
+
+MS_IN_SECOND = 1000.0
+
+_DEFAULT_TOKEN = object()
 
 
 class TimeStat:
@@ -18,7 +23,7 @@ def __init__(self):
         self.time = 0.0
         self.durations = []
         self.count = 0
-        self.last_update_time = None
+        self._active_tokens: dict[Any, float] = {}
 
     def __add__(self, other):
         """
@@ -36,19 +41,29 @@ def __add__(self, other):
         new_stat.count = self.count + other.count
         return new_stat
 
-    def update(self) -> None:
+    def update(self, token: Any | None = None) -> Any:
         """
         Updates the statistics with the latest duration.
+
+        Args:
+            token: Identifier for asynchronous measurements.
+
+        Returns:
+            Any: The token associated with the current timing segment.
         """
+
+        key = token if token is not None else _DEFAULT_TOKEN
         time = perf_counter()
-        if self.last_update_time:
-            diff = time - self.last_update_time
-            self.time += diff
-            self.durations.append(diff)
-            self.count += 1
-            self.last_update_time = None
-        else:
-            self.last_update_time = time
+        start_time = self._active_tokens.pop(key, None)
+        if start_time is None:
+            self._active_tokens[key] = time
+            return key
+
+        diff = (time - start_time) * MS_IN_SECOND
+        self.time += diff
+        self.durations.append(diff)
+        self.count += 1
+        return key
 
     def reset(self) -> None:
         """
@@ -57,7 +72,7 @@ def reset(self) -> None:
         self.time = 0.0
         self.durations = []
         self.count = 0
-        self.last_update_time = None
+        self._active_tokens.clear()
 
     def mean(self) -> float:
         """
diff --git a/src/model_api/models/model.py b/src/model_api/models/model.py
index b6bf79c4..15153586 100644
--- a/src/model_api/models/model.py
+++ b/src/model_api/models/model.py
@@ -553,11 +553,13 @@ def infer_async(self, input_data: dict, user_data: Any):
                 "The model is not loaded to the device. Please, create the wrapper "
                 "with preload=True option or call load() method before infer_async()",
             )
-        self.perf.total_time.update()
+        total_token = object()
+        inference_token = object()
+        self.perf.total_time.update(total_token)
         self.perf.preprocess_time.update()
         dict_data, meta = self.preprocess(input_data)
         self.perf.preprocess_time.update()
-        self.perf.inference_time.update()
+        self.perf.inference_time.update(inference_token)
         self.inference_adapter.infer_async(
             dict_data,
             (
@@ -567,6 +569,8 @@ def infer_async(self, input_data: dict, user_data: Any):
                 self.postprocess,
                 self.callback_fn,
                 user_data,
+                total_token,
+                inference_token,
             ),
         )
 
@@ -575,13 +579,33 @@ def _process_callback(request, callback_data: Any):
         """
         A wrapper for async inference callback.
         """
-        model, meta, get_result_fn, postprocess_fn, callback_fn, user_data = callback_data
+        total_token = None
+        inference_token = None
+        if len(callback_data) >= 8:
+            (
+                model,
+                meta,
+                get_result_fn,
+                postprocess_fn,
+                callback_fn,
+                user_data,
+                total_token,
+                inference_token,
+            ) = callback_data
+        else:
+            model, meta, get_result_fn, postprocess_fn, callback_fn, user_data = callback_data
         raw_result = get_result_fn(request)
-        model.perf.inference_time.update()
+        if inference_token is not None:
+            model.perf.inference_time.update(inference_token)
+        else:
+            model.perf.inference_time.update()
         model.perf.postprocess_time.update()
         result = postprocess_fn(raw_result, meta)
         model.perf.postprocess_time.update()
-        model.perf.total_time.update()
+        if total_token is not None:
+            model.perf.total_time.update(total_token)
+        else:
+            model.perf.total_time.update()
         callback_fn(result, user_data)
 
     def set_callback(self, callback_fn: Callable):
diff --git a/tests/unit/metrics/test_performancemetrics.py b/tests/unit/metrics/test_performancemetrics.py
index b95c6bf7..decef9dd 100644
--- a/tests/unit/metrics/test_performancemetrics.py
+++ b/tests/unit/metrics/test_performancemetrics.py
@@ -64,6 +64,16 @@ def test_reset(self):
         assert self.metrics.postprocess_time.durations == []
         assert self.metrics.total_time.durations == []
 
+    def test_reset_including_load_time(self):
+        """Test reset clears load time when requested."""
+        self.metrics.load_time.time = 5.0
+        self.metrics.load_time.durations = [5.0]
+
+        self.metrics.reset(include_load_time=True)
+
+        assert self.metrics.load_time.time == 0.0
+        assert self.metrics.load_time.durations == []
+
     def test_get_load_time(self):
         """Test get_load_time method returns the correct TimeStat object."""
         self.metrics.load_time.time = 1.23
@@ -96,6 +106,11 @@ def test_get_postprocess_time(self):
         assert isinstance(postprocess_time, TimeStat)
         assert postprocess_time.time == 4.56
 
+    def test_get_total_time(self):
+        """Test get_total_time returns the total TimeStat object."""
+        total_time = self.metrics.get_total_time()
+        assert total_time is self.metrics.total_time
+
     def test_get_total_frames_empty(self):
         """Test get_total_frames returns 0 when no frames processed."""
         assert self.metrics.get_total_frames() == 0
@@ -112,7 +127,8 @@ def test_get_fps_no_data(self):
     def test_get_fps_with_data(self):
         """Test get_fps calculates correctly when frames are processed."""
         self.metrics.total_time.durations = [1.0, 2.0, 3.0]
-        expected_fps = 3 / 6.0
+        self.metrics.total_time.time = 6.0
+        expected_fps = 3 / (6.0 / 1000.0)
         assert abs(self.metrics.get_fps() - expected_fps) < 1e-7
 
     def test_get_fps_zero_total_time(self):
@@ -142,6 +158,10 @@ def test_add_valid_metrics(self):
         metrics2.inference_time.durations = [2.5]
         metrics2.postprocess_time.time = 3.5
         metrics2.postprocess_time.durations = [3.5]
+        metrics1.total_time.time = 6.0
+        metrics1.total_time.durations = [6.0]
+        metrics2.total_time.time = 4.0
+        metrics2.total_time.durations = [4.0]
 
         result = metrics1 + metrics2
 
@@ -150,10 +170,12 @@ def test_add_valid_metrics(self):
         assert result.preprocess_time.time == 3.5
         assert result.inference_time.time == 5.5
         assert result.postprocess_time.time == 7.5
+        assert result.total_time.time == 10.0
         assert result.load_time.durations == [1.0, 0.5]
         assert result.preprocess_time.durations == [2.0, 1.5]
         assert result.inference_time.durations == [3.0, 2.5]
         assert result.postprocess_time.durations == [4.0, 3.5]
+        assert result.total_time.durations == [6.0, 4.0]
 
     def test_add_invalid_type(self):
         """Test adding PerformanceMetrics with invalid type returns NotImplemented."""
@@ -166,21 +188,6 @@ def test_add_invalid_type(self):
         result = self.metrics.__add__(None)
         assert result == NotImplemented
 
-    def test_add_missing_total_time_in_result(self):
-        """Test that addition doesn't include total_time in the result."""
-        metrics1 = PerformanceMetrics()
-        metrics2 = PerformanceMetrics()
-
-        metrics1.total_time.time = 10.0
-        metrics1.total_time.durations = [10.0]
-        metrics2.total_time.time = 5.0
-        metrics2.total_time.durations = [5.0]
-
-        result = metrics1 + metrics2
-
-        assert result.total_time.time == 0.0
-        assert result.total_time.durations == []
-
     @patch("model_api.metrics.performance.logger")
     def test_log_metrics_empty(self, mock_logger):
         """Test log_metrics with empty metrics."""
@@ -193,13 +200,13 @@ def test_log_metrics_empty(self, mock_logger):
         logged_content = mock_logger.info.call_args[0][0]
 
         assert "🚀 PERFORMANCE METRICS REPORT 🚀" in logged_content
-        assert "Load Time: 0.000s" in logged_content
-        assert "Preprocess:  0.000s ± 0.000s" in logged_content
-        assert "Inference:   0.000s ± 0.000s" in logged_content
-        assert "Postprocess: 0.000s ± 0.000s" in logged_content
-        assert "Mean:  0.000s ± 0.000s" in logged_content
-        assert "Min:   0.000s" in logged_content
-        assert "Max:   0.000s" in logged_content
+        assert "Load Time: 0.00 ms" in logged_content
+        assert "Preprocess:  0.00 ms ± 0.00 ms" in logged_content
+        assert "Inference:   0.00 ms ± 0.00 ms" in logged_content
+        assert "Postprocess: 0.00 ms ± 0.00 ms" in logged_content
+        assert "Mean:  0.00 ms ± 0.00 ms" in logged_content
+        assert "Min:   0.00 ms" in logged_content
+        assert "Max:   0.00 ms" in logged_content
         assert "Total Frames: 0" in logged_content
         assert "FPS:          0.00" in logged_content
 
@@ -227,13 +234,13 @@ def test_log_metrics_with_data(self, mock_logger):
         logged_content = mock_logger.info.call_args[0][0]
 
         assert "🚀 PERFORMANCE METRICS REPORT 🚀" in logged_content
-        assert "Load Time: 1.234s" in logged_content
-        assert "Preprocess:  2.345s ± 0.123s" in logged_content
-        assert "Inference:   3.456s ± 0.234s" in logged_content
-        assert "Postprocess: 4.567s ± 0.345s" in logged_content
-        assert "Mean:  10.123s ± 0.456s" in logged_content
-        assert "Min:   1.000s" in logged_content
-        assert "Max:   3.000s" in logged_content
+        assert "Load Time: 1.23 ms" in logged_content
+        assert "Preprocess:  2.35 ms ± 0.12 ms" in logged_content
+        assert "Inference:   3.46 ms ± 0.23 ms" in logged_content
+        assert "Postprocess: 4.57 ms ± 0.34 ms" in logged_content  # 0.345 rounds to 0.34
+        assert "Mean:  10.12 ms ± 0.46 ms" in logged_content
+        assert "Min:   1.00 ms" in logged_content
+        assert "Max:   3.00 ms" in logged_content
         assert "Total Frames: 3" in logged_content
         assert "FPS:          12.34" in logged_content