diff --git a/docs/source/guides/performance_metrics.md b/docs/source/guides/performance_metrics.md index 6280028d..d5fb7f31 100644 --- a/docs/source/guides/performance_metrics.md +++ b/docs/source/guides/performance_metrics.md @@ -16,6 +16,8 @@ Performance metrics are automatically collected during model inference and inclu - **Total frames**: Total number of inferences - **FPS**: Frames Per Second +All timing statistics are reported in milliseconds (ms). + Each metric provides statistical information including mean, standard deviation, and individual measurements. ## Basic Usage @@ -59,21 +61,21 @@ This will output detailed performance information: ============================================================ 📊 Model Loading: - Load Time: 2.497s + Load Time: 2497.00 ms ⚙️ Processing Times (mean ± std): - Preprocess: 0.001s ± 0.000s - Inference: 0.570s ± 0.020s - Postprocess: 0.001s ± 0.000s + Preprocess: 1.00 ms ± 0.10 ms + Inference: 570.00 ms ± 20.00 ms + Postprocess: 1.00 ms ± 0.10 ms 📈 Total Time Statistics: - Mean: 0.572s ± 0.020s - Min: 0.556s - Max: 0.642s + Mean: 572.00 ms ± 20.00 ms + Min: 556.00 ms + Max: 642.00 ms 🎯 Performance Summary: - Total Frames: 100 - FPS: 1.75 + Total Frames: 100 + FPS: 1.75 ============================================================ ``` @@ -94,9 +96,9 @@ total_min_time = metrics.get_total_time_min() total_max_time = metrics.get_total_time_max() # Access statistical information -print(f"Mean inference time: {inference_time.mean():.3f} seconds") -print(f"Standard deviation: {inference_time.stddev():.3f} seconds") -print(f"Total inference time: {inference_time.time:.3f} seconds") +print(f"Mean inference time: {inference_time.mean():.2f} ms") +print(f"Standard deviation: {inference_time.stddev():.2f} ms") +print(f"Total inference time: {inference_time.time:.2f} ms") print(f"Number of inferences: {inference_time.count}") ``` @@ -150,7 +152,7 @@ for i in range(100): if (i + 1) % 10 == 0: metrics = model.get_performance_metrics() print(f"After {i + 1} inferences:") - print(f" Mean inference time: {metrics.get_inference_time().mean():.3f}s") + print(f" Mean inference time: {metrics.get_inference_time().mean():.2f} ms") print(f" Current FPS: {metrics.get_fps():.2f}") ``` @@ -166,13 +168,12 @@ metrics = model.get_performance_metrics() preprocess_time = metrics.get_preprocess_time().mean() inference_time = metrics.get_inference_time().mean() postprocess_time = metrics.get_postprocess_time().mean() +total = preprocess_time + inference_time + postprocess_time print("Time breakdown:") -print(f" Preprocessing: {preprocess_time:.3f}s ({preprocess_time/total:.1%})") -print(f" Inference: {inference_time:.3f}s ({inference_time/total:.1%})") -print(f" Postprocessing: {postprocess_time:.3f}s ({postprocess_time/total:.1%})") - -total = preprocess_time + inference_time + postprocess_time +print(f" Preprocessing: {preprocess_time:.2f} ms ({preprocess_time/total:.1%})") +print(f" Inference: {inference_time:.2f} ms ({inference_time/total:.1%})") +print(f" Postprocessing: {postprocess_time:.2f} ms ({postprocess_time/total:.1%})") ``` ### Warm-up Considerations @@ -184,8 +185,12 @@ The first few inferences may be slower due to system warm-up. Consider excluding for _ in range(5): model(image) -# Reset metrics after warm-up -model.get_performance_metrics().reset() +# Reset metrics after warm-up (load time stats are preserved by default) +metrics = model.get_performance_metrics() +metrics.reset() + +# If you also need to clear model load measurements +# metrics.reset(include_load_time=True) # Now measure actual performance for _ in range(100): @@ -227,8 +232,9 @@ def analyze_model_performance(model_path, test_images, warmup_runs=5, test_runs= for _ in range(warmup_runs): model(image) - # Reset metrics after warm-up - model.get_performance_metrics().reset() + # Reset metrics after warm-up (keeping load time by default) + metrics = model.get_performance_metrics() + metrics.reset() print(f"Running {test_runs} test inferences...") # Performance measurement runs @@ -252,9 +258,11 @@ def analyze_model_performance(model_path, test_images, warmup_runs=5, test_runs= # Additional analysis inference_time = metrics.get_inference_time() print(f"\nInference time analysis:") - print(f" Minimum: {min(inference_time.durations):.3f}s") - print(f" Maximum: {max(inference_time.durations):.3f}s") - print(f" Median: {sorted(inference_time.durations)[len(inference_time.durations)//2]:.3f}s") + print(f" Minimum: {min(inference_time.durations):.2f} ms") + print(f" Maximum: {max(inference_time.durations):.2f} ms") + print( + f" Median: {sorted(inference_time.durations)[len(inference_time.durations)//2]:.2f} ms" + ) return metrics diff --git a/examples/metrics/benchmark.py b/examples/metrics/benchmark.py index 6c896527..690efb35 100644 --- a/examples/metrics/benchmark.py +++ b/examples/metrics/benchmark.py @@ -31,12 +31,201 @@ def get_image_files(dataset_path: str) -> list[str]: return test_images -def analyze_model_performance(model_path, test_images, device, warmup_runs, test_runs): +def analyze_model_performance( # noqa: C901 + model_path, + test_images, + device, + warmup_runs, + test_runs, + num_streams=None, + num_threads=None, + max_num_requests=0, + performance_hint=None, + performance_hint_num_requests=None, + use_async=False, + inference_only=False, +): """Complete performance analysis example.""" - # Load model - model = Model.create_model(model_path, device=device) + # Import required components + import time + import numpy as np + from openvino import Core + + from model_api.adapters.openvino_adapter import OpenvinoAdapter, get_user_config + + # Build plugin config + core = Core() + plugin_config = get_user_config( + device, + num_streams or "1", + num_threads, + ) + + # Add performance hint if specified + if performance_hint: + plugin_config["PERFORMANCE_HINT"] = performance_hint + if performance_hint_num_requests is not None: + plugin_config["PERFORMANCE_HINT_NUM_REQUESTS"] = str(performance_hint_num_requests) + + # Create adapter with custom plugin config + inference_adapter = OpenvinoAdapter( + core=core, + model=model_path, + device=device, + plugin_config=plugin_config, + max_num_requests=max_num_requests, + ) + model = Model.create_model(inference_adapter, preload=True) + + if inference_only: + # Inference-only mode: use pre-filled random data, no I/O or preprocessing + print("Benchmarking in inference only mode (inputs filling are not included in measurement loop).") + + # Get input shape and create random data + # For image models, we need to create a proper image-like array + input_layer_name = next(iter(model.inputs.keys())) + input_shape = model.inputs[input_layer_name].shape + input_precision = model.inputs[input_layer_name].precision + input_layout = model.inputs[input_layer_name].layout + + print(f"Input: {input_layer_name}, shape: {input_shape}, precision: {input_precision}, layout: {input_layout}") + + # Get actual compiled model input to check for embedded preprocessing + compiled_inputs = inference_adapter.compiled_model.inputs + actual_input = compiled_inputs[0] + actual_partial_shape = actual_input.partial_shape + actual_layout = str(actual_input.layout) if hasattr(actual_input, "layout") else str(input_layout) + + print(f"Compiled model input partial_shape: {actual_partial_shape}, layout: {actual_layout}") + + # Create random tensor data matching the actual compiled model input + # If shape has dynamic dimensions, use reasonable defaults + tensor_shape = [] + for i, dim in enumerate(actual_partial_shape): + if dim.is_dynamic: + # Dynamic dimension - use reasonable default + if i == 0: + tensor_shape.append(1) # Batch + elif i == 3 and actual_layout == "NHWC": + tensor_shape.append(3) # Channels for NHWC + else: + tensor_shape.append(224) # Height/Width default + else: + tensor_shape.append(dim.get_length()) + + print(f"Using tensor shape: {tensor_shape}") + + # Create random tensor data using Generator API + rng = np.random.default_rng() + if input_precision == "FP32" or input_precision == "f32": + random_tensor = rng.standard_normal(tensor_shape).astype(np.float32) + elif input_precision == "FP16" or input_precision == "f16": + random_tensor = rng.standard_normal(tensor_shape).astype(np.float16) + elif input_precision == "U8" or input_precision == "u8": + random_tensor = rng.integers(0, 256, size=tensor_shape, dtype=np.uint8) + elif input_precision == "I8" or input_precision == "i8": + random_tensor = rng.integers(-128, 128, size=tensor_shape, dtype=np.int8) + else: + random_tensor = rng.standard_normal(tensor_shape).astype(np.float32) + + # Create dict_data directly without preprocessing + dict_data = {input_layer_name: random_tensor} + + print(f"Raw tensor shape: {random_tensor.shape}, dtype: {random_tensor.dtype}") + + print("Starting warm-up...") + # Warm-up using raw inference - bypass model wrapper, use adapter directly + for _ in range(warmup_runs): + inference_adapter.infer_sync(dict_data) + + if use_async: + print(f"Running {test_runs} test inferences (async mode, inference only)...") + completed_count = [0] # Use list to allow modification in callback + first_start_time = [None] + last_end_time = [None] + + def callback(request, userdata): + completed_count[0] += 1 + last_end_time[0] = time.perf_counter() + + # Set callback on the inference adapter directly + inference_adapter.set_callback(callback) + + # Measure from first submission to last completion + first_start_time[0] = time.perf_counter() + + # Submit all inference requests + for i in range(test_runs): + inference_adapter.infer_async(dict_data, i) + if (i + 1) % 10 == 0: + print(f" Submitted {i + 1}/{test_runs}") + + # Wait for all requests to complete + print(" Waiting for all requests to complete...") + inference_adapter.await_all() + + total_duration = (last_end_time[0] - first_start_time[0]) * 1000 # Convert to ms + + print(f" Completed {completed_count[0]}/{test_runs}") + + # Calculate metrics + avg_latency = total_duration / test_runs + throughput = test_runs / (total_duration / 1000) + + print("\n" + "=" * 60) + print(" 🚀 PERFORMANCE METRICS REPORT 🚀 ") + print("=" * 60) + print(f"\n📊 Execution Devices: [{device}]") + print(f" Count: {test_runs} iterations") + print(f" Duration: {total_duration:.2f} ms") + print("\n📈 Latency:") + print(f" Average: {avg_latency:.2f} ms") + print(f"\n🎯 Throughput: {throughput:.2f} FPS") + print("=" * 60 + "\n") + + else: + print(f"Running {test_runs} test inferences (sync mode, inference only)...") + inference_times = [] + + # Measure each inference separately for latency statistics + for i in range(test_runs): + start_time = time.perf_counter() + inference_adapter.infer_sync(dict_data) + end_time = time.perf_counter() + + inference_times.append((end_time - start_time) * 1000) # Convert to ms + + if (i + 1) % 10 == 0: + print(f" Completed {i + 1}/{test_runs}") + + # Calculate statistics + inference_times = np.array(inference_times) + total_duration = np.sum(inference_times) + mean_latency = np.mean(inference_times) + median_latency = np.median(inference_times) + min_latency = np.min(inference_times) + max_latency = np.max(inference_times) + throughput = test_runs / (total_duration / 1000) + + print("\n" + "=" * 60) + print(" 🚀 PERFORMANCE METRICS REPORT 🚀 ") + print("=" * 60) + print(f"\n📊 Execution Devices: [{device}]") + print(f" Count: {test_runs} iterations") + print(f" Duration: {total_duration:.2f} ms") + print("\n📈 Latency:") + print(f" Median: {median_latency:.2f} ms") + print(f" Average: {mean_latency:.2f} ms") + print(f" Min: {min_latency:.2f} ms") + print(f" Max: {max_latency:.2f} ms") + print(f"\n🎯 Throughput: {throughput:.2f} FPS") + print("=" * 60 + "\n") + + return None + + # Regular mode with image loading and full preprocessing # Load test image image = cv2.imread(test_images[0]) @@ -48,14 +237,37 @@ def analyze_model_performance(model_path, test_images, device, warmup_runs, test # Reset metrics after warm-up model.get_performance_metrics().reset() - print(f"Running {test_runs} test inferences...") - # Performance measurement runs - for i, image_path in enumerate(test_images[:test_runs]): - image = cv2.imread(image_path) - model(image) - # Log progress - if (i + 1) % 10 == 0: - print(f" Completed {i + 1}/{test_runs}") + if use_async: + print(f"Running {test_runs} test inferences (async mode)...") + # Async inference mode - submit all requests concurrently + results = {} + + def callback(result, userdata): + results[userdata] = result + + model.set_callback(callback) + + # Submit all inference requests without blocking + for i, image_path in enumerate(test_images[:test_runs]): + image = cv2.imread(image_path) + model.infer_async(image, user_data=i) + # Log progress + if (i + 1) % 10 == 0: + print(f" Submitted {i + 1}/{test_runs}") + + # Wait for all requests to complete + print(" Waiting for all requests to complete...") + model.await_all() + print(f" Completed {len(results)}/{test_runs}") + else: + print(f"Running {test_runs} test inferences (sync mode)...") + # Synchronous inference mode - process images sequentially + for i, image_path in enumerate(test_images[:test_runs]): + image = cv2.imread(image_path) + model(image) + # Log progress + if (i + 1) % 10 == 0: + print(f" Completed {i + 1}/{test_runs}") # Analyze results metrics = model.get_performance_metrics() @@ -72,6 +284,46 @@ def main(): parser.add_argument("--warmup-runs", type=int, default=5, help="Number of warmup runs (default: 5)") parser.add_argument("--test-runs", type=int, default=100, help="Number of test runs (default: 100)") + # OpenVINO performance tuning options + parser.add_argument( + "--performance-hint", + type=str, + choices=["LATENCY", "THROUGHPUT", "CUMULATIVE_THROUGHPUT"], + help="OpenVINO performance hint (LATENCY, THROUGHPUT, CUMULATIVE_THROUGHPUT)", + ) + parser.add_argument( + "--num-streams", + type=str, + help="Number of inference streams (e.g., '1', '2', 'NUM_STREAMS_AUTO')", + ) + parser.add_argument( + "--num-threads", + type=int, + help="Number of CPU threads for inference", + ) + parser.add_argument( + "--max-num-requests", + type=int, + default=0, + help="Maximum number of infer requests for asynchronous inference (default: 0 = auto)", + ) + parser.add_argument( + "--performance-hint-num-requests", + type=int, + help="Number of requests for performance hint optimization", + ) + parser.add_argument( + "--async", + dest="use_async", + action="store_true", + help="Use asynchronous inference mode for concurrent request submission (recommended for THROUGHPUT mode)", + ) + parser.add_argument( + "--inference-only", + action="store_true", + help="Use inference-only mode with pre-filled random data (no image I/O or preprocessing overhead)", + ) + # Show help if no arguments are provided if len(sys.argv) == 1: parser.print_help() @@ -82,16 +334,31 @@ def main(): model_path = args.model_path dataset_path = args.dataset_path - # Get list of image files from the directory - test_images = get_image_files(dataset_path) - - print(f"Found {len(test_images)} images in {dataset_path}") - - if not test_images: - print("Error: No images found in the dataset directory!") - exit(1) + # Get list of image files from the directory (only needed if not in inference-only mode) + if not args.inference_only: + test_images = get_image_files(dataset_path) + print(f"Found {len(test_images)} images in {dataset_path}") + if not test_images: + print("Error: No images found in the dataset directory!") + exit(1) + else: + # Inference-only mode doesn't need actual images + test_images = None - analyze_model_performance(model_path, test_images, args.device, args.warmup_runs, args.test_runs) + analyze_model_performance( + model_path, + test_images, + args.device, + args.warmup_runs, + args.test_runs, + num_streams=args.num_streams, + num_threads=args.num_threads, + max_num_requests=args.max_num_requests, + performance_hint=args.performance_hint, + performance_hint_num_requests=args.performance_hint_num_requests, + use_async=args.use_async, + inference_only=args.inference_only, + ) if __name__ == "__main__": diff --git a/src/model_api/metrics/performance.py b/src/model_api/metrics/performance.py index 3504ee17..ebf8a169 100644 --- a/src/model_api/metrics/performance.py +++ b/src/model_api/metrics/performance.py @@ -5,7 +5,7 @@ import logging -from .time_stat import TimeStat +from .time_stat import MS_IN_SECOND, TimeStat logger = logging.getLogger(__name__) @@ -37,12 +37,19 @@ def __add__(self, other): new_metrics.preprocess_time = self.preprocess_time + other.preprocess_time new_metrics.inference_time = self.inference_time + other.inference_time new_metrics.postprocess_time = self.postprocess_time + other.postprocess_time + new_metrics.total_time = self.total_time + other.total_time return new_metrics - def reset(self) -> None: + def reset(self, include_load_time: bool = False) -> None: """ Resets performance metrics to the initial state. + + Args: + include_load_time (bool, optional): Whether to reset the load time statistics as well. + Defaults to False to keep model load measurements available across runs. """ + if include_load_time: + self.load_time.reset() self.preprocess_time.reset() self.inference_time.reset() self.postprocess_time.reset() @@ -84,6 +91,10 @@ def get_postprocess_time(self) -> TimeStat: """ return self.postprocess_time + def get_total_time(self) -> TimeStat: + """Returns the total pipeline time statistics.""" + return self.total_time + def get_total_frames(self) -> int: """ Returns the total number of frames processed. @@ -100,7 +111,10 @@ def get_fps(self) -> float: Returns: float: Frames Per Second. """ - return self.get_total_frames() / sum(self.total_time.durations) if sum(self.total_time.durations) > 0 else 0.0 + total_time_ms = self.total_time.time + if total_time_ms <= 0: + return 0.0 + return self.get_total_frames() / (total_time_ms / MS_IN_SECOND) def get_total_time_min(self) -> float: """ @@ -132,17 +146,17 @@ def log_metrics(self) -> None: "=" * 60, "", "📊 Model Loading:", - f" Load Time: {self.load_time.mean():.3f}s", + f" Load Time: {self.load_time.mean():.2f} ms", "", "⚙️ Processing Times (mean ± std):", - f" Preprocess: {self.preprocess_time.mean():.3f}s ± {self.preprocess_time.stddev():.3f}s", - f" Inference: {self.inference_time.mean():.3f}s ± {self.inference_time.stddev():.3f}s", - f" Postprocess: {self.postprocess_time.mean():.3f}s ± {self.postprocess_time.stddev():.3f}s", + f" Preprocess: {self.preprocess_time.mean():.2f} ms ± {self.preprocess_time.stddev():.2f} ms", + f" Inference: {self.inference_time.mean():.2f} ms ± {self.inference_time.stddev():.2f} ms", + f" Postprocess: {self.postprocess_time.mean():.2f} ms ± {self.postprocess_time.stddev():.2f} ms", "", "📈 Total Time Statistics:", - f" Mean: {self.total_time.mean():.3f}s ± {self.total_time.stddev():.3f}s", - f" Min: {self.get_total_time_min():.3f}s", - f" Max: {self.get_total_time_max():.3f}s", + f" Mean: {self.total_time.mean():.2f} ms ± {self.total_time.stddev():.2f} ms", + f" Min: {self.get_total_time_min():.2f} ms", + f" Max: {self.get_total_time_max():.2f} ms", "", "🎯 Performance Summary:", f" Total Frames: {self.get_total_frames():,}", diff --git a/src/model_api/metrics/time_stat.py b/src/model_api/metrics/time_stat.py index 02657118..ea098a2d 100644 --- a/src/model_api/metrics/time_stat.py +++ b/src/model_api/metrics/time_stat.py @@ -4,6 +4,11 @@ # from time import perf_counter +from typing import Any + +MS_IN_SECOND = 1000.0 + +_DEFAULT_TOKEN = object() class TimeStat: @@ -18,7 +23,7 @@ def __init__(self): self.time = 0.0 self.durations = [] self.count = 0 - self.last_update_time = None + self._active_tokens: dict[Any, float] = {} def __add__(self, other): """ @@ -36,19 +41,29 @@ def __add__(self, other): new_stat.count = self.count + other.count return new_stat - def update(self) -> None: + def update(self, token: Any | None = None) -> Any: """ Updates the statistics with the latest duration. + + Args: + token: Identifier for asynchronous measurements. + + Returns: + Any: The token associated with the current timing segment. """ + + key = token if token is not None else _DEFAULT_TOKEN time = perf_counter() - if self.last_update_time: - diff = time - self.last_update_time - self.time += diff - self.durations.append(diff) - self.count += 1 - self.last_update_time = None - else: - self.last_update_time = time + start_time = self._active_tokens.pop(key, None) + if start_time is None: + self._active_tokens[key] = time + return key + + diff = (time - start_time) * MS_IN_SECOND + self.time += diff + self.durations.append(diff) + self.count += 1 + return key def reset(self) -> None: """ @@ -57,7 +72,7 @@ def reset(self) -> None: self.time = 0.0 self.durations = [] self.count = 0 - self.last_update_time = None + self._active_tokens.clear() def mean(self) -> float: """ diff --git a/src/model_api/models/model.py b/src/model_api/models/model.py index b6bf79c4..15153586 100644 --- a/src/model_api/models/model.py +++ b/src/model_api/models/model.py @@ -553,11 +553,13 @@ def infer_async(self, input_data: dict, user_data: Any): "The model is not loaded to the device. Please, create the wrapper " "with preload=True option or call load() method before infer_async()", ) - self.perf.total_time.update() + total_token = object() + inference_token = object() + self.perf.total_time.update(total_token) self.perf.preprocess_time.update() dict_data, meta = self.preprocess(input_data) self.perf.preprocess_time.update() - self.perf.inference_time.update() + self.perf.inference_time.update(inference_token) self.inference_adapter.infer_async( dict_data, ( @@ -567,6 +569,8 @@ def infer_async(self, input_data: dict, user_data: Any): self.postprocess, self.callback_fn, user_data, + total_token, + inference_token, ), ) @@ -575,13 +579,33 @@ def _process_callback(request, callback_data: Any): """ A wrapper for async inference callback. """ - model, meta, get_result_fn, postprocess_fn, callback_fn, user_data = callback_data + total_token = None + inference_token = None + if len(callback_data) >= 8: + ( + model, + meta, + get_result_fn, + postprocess_fn, + callback_fn, + user_data, + total_token, + inference_token, + ) = callback_data + else: + model, meta, get_result_fn, postprocess_fn, callback_fn, user_data = callback_data raw_result = get_result_fn(request) - model.perf.inference_time.update() + if inference_token is not None: + model.perf.inference_time.update(inference_token) + else: + model.perf.inference_time.update() model.perf.postprocess_time.update() result = postprocess_fn(raw_result, meta) model.perf.postprocess_time.update() - model.perf.total_time.update() + if total_token is not None: + model.perf.total_time.update(total_token) + else: + model.perf.total_time.update() callback_fn(result, user_data) def set_callback(self, callback_fn: Callable): diff --git a/tests/unit/metrics/test_performancemetrics.py b/tests/unit/metrics/test_performancemetrics.py index b95c6bf7..decef9dd 100644 --- a/tests/unit/metrics/test_performancemetrics.py +++ b/tests/unit/metrics/test_performancemetrics.py @@ -64,6 +64,16 @@ def test_reset(self): assert self.metrics.postprocess_time.durations == [] assert self.metrics.total_time.durations == [] + def test_reset_including_load_time(self): + """Test reset clears load time when requested.""" + self.metrics.load_time.time = 5.0 + self.metrics.load_time.durations = [5.0] + + self.metrics.reset(include_load_time=True) + + assert self.metrics.load_time.time == 0.0 + assert self.metrics.load_time.durations == [] + def test_get_load_time(self): """Test get_load_time method returns the correct TimeStat object.""" self.metrics.load_time.time = 1.23 @@ -96,6 +106,11 @@ def test_get_postprocess_time(self): assert isinstance(postprocess_time, TimeStat) assert postprocess_time.time == 4.56 + def test_get_total_time(self): + """Test get_total_time returns the total TimeStat object.""" + total_time = self.metrics.get_total_time() + assert total_time is self.metrics.total_time + def test_get_total_frames_empty(self): """Test get_total_frames returns 0 when no frames processed.""" assert self.metrics.get_total_frames() == 0 @@ -112,7 +127,8 @@ def test_get_fps_no_data(self): def test_get_fps_with_data(self): """Test get_fps calculates correctly when frames are processed.""" self.metrics.total_time.durations = [1.0, 2.0, 3.0] - expected_fps = 3 / 6.0 + self.metrics.total_time.time = 6.0 + expected_fps = 3 / (6.0 / 1000.0) assert abs(self.metrics.get_fps() - expected_fps) < 1e-7 def test_get_fps_zero_total_time(self): @@ -142,6 +158,10 @@ def test_add_valid_metrics(self): metrics2.inference_time.durations = [2.5] metrics2.postprocess_time.time = 3.5 metrics2.postprocess_time.durations = [3.5] + metrics1.total_time.time = 6.0 + metrics1.total_time.durations = [6.0] + metrics2.total_time.time = 4.0 + metrics2.total_time.durations = [4.0] result = metrics1 + metrics2 @@ -150,10 +170,12 @@ def test_add_valid_metrics(self): assert result.preprocess_time.time == 3.5 assert result.inference_time.time == 5.5 assert result.postprocess_time.time == 7.5 + assert result.total_time.time == 10.0 assert result.load_time.durations == [1.0, 0.5] assert result.preprocess_time.durations == [2.0, 1.5] assert result.inference_time.durations == [3.0, 2.5] assert result.postprocess_time.durations == [4.0, 3.5] + assert result.total_time.durations == [6.0, 4.0] def test_add_invalid_type(self): """Test adding PerformanceMetrics with invalid type returns NotImplemented.""" @@ -166,21 +188,6 @@ def test_add_invalid_type(self): result = self.metrics.__add__(None) assert result == NotImplemented - def test_add_missing_total_time_in_result(self): - """Test that addition doesn't include total_time in the result.""" - metrics1 = PerformanceMetrics() - metrics2 = PerformanceMetrics() - - metrics1.total_time.time = 10.0 - metrics1.total_time.durations = [10.0] - metrics2.total_time.time = 5.0 - metrics2.total_time.durations = [5.0] - - result = metrics1 + metrics2 - - assert result.total_time.time == 0.0 - assert result.total_time.durations == [] - @patch("model_api.metrics.performance.logger") def test_log_metrics_empty(self, mock_logger): """Test log_metrics with empty metrics.""" @@ -193,13 +200,13 @@ def test_log_metrics_empty(self, mock_logger): logged_content = mock_logger.info.call_args[0][0] assert "🚀 PERFORMANCE METRICS REPORT 🚀" in logged_content - assert "Load Time: 0.000s" in logged_content - assert "Preprocess: 0.000s ± 0.000s" in logged_content - assert "Inference: 0.000s ± 0.000s" in logged_content - assert "Postprocess: 0.000s ± 0.000s" in logged_content - assert "Mean: 0.000s ± 0.000s" in logged_content - assert "Min: 0.000s" in logged_content - assert "Max: 0.000s" in logged_content + assert "Load Time: 0.00 ms" in logged_content + assert "Preprocess: 0.00 ms ± 0.00 ms" in logged_content + assert "Inference: 0.00 ms ± 0.00 ms" in logged_content + assert "Postprocess: 0.00 ms ± 0.00 ms" in logged_content + assert "Mean: 0.00 ms ± 0.00 ms" in logged_content + assert "Min: 0.00 ms" in logged_content + assert "Max: 0.00 ms" in logged_content assert "Total Frames: 0" in logged_content assert "FPS: 0.00" in logged_content @@ -227,13 +234,13 @@ def test_log_metrics_with_data(self, mock_logger): logged_content = mock_logger.info.call_args[0][0] assert "🚀 PERFORMANCE METRICS REPORT 🚀" in logged_content - assert "Load Time: 1.234s" in logged_content - assert "Preprocess: 2.345s ± 0.123s" in logged_content - assert "Inference: 3.456s ± 0.234s" in logged_content - assert "Postprocess: 4.567s ± 0.345s" in logged_content - assert "Mean: 10.123s ± 0.456s" in logged_content - assert "Min: 1.000s" in logged_content - assert "Max: 3.000s" in logged_content + assert "Load Time: 1.23 ms" in logged_content + assert "Preprocess: 2.35 ms ± 0.12 ms" in logged_content + assert "Inference: 3.46 ms ± 0.23 ms" in logged_content + assert "Postprocess: 4.57 ms ± 0.34 ms" in logged_content # 0.345 rounds to 0.34 + assert "Mean: 10.12 ms ± 0.46 ms" in logged_content + assert "Min: 1.00 ms" in logged_content + assert "Max: 3.00 ms" in logged_content assert "Total Frames: 3" in logged_content assert "FPS: 12.34" in logged_content