microsoft
diff --git a/‎examples/benchmarks/ort_inference_performance.py‎
Lines changed: 6 additions & 6 deletions b/‎examples/benchmarks/ort_inference_performance.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎examples/benchmarks/pytorch_huggingface_models.py‎
Lines changed: 0 additions & 142 deletions b/‎examples/benchmarks/pytorch_huggingface_models.py‎
Lines changed: 0 additions & 142 deletions
diff --git a/‎examples/benchmarks/tensorrt_inference_performance.py‎
Lines changed: 6 additions & 5 deletions b/‎examples/benchmarks/tensorrt_inference_performance.py‎
Lines changed: 6 additions & 5 deletions
diff --git a/‎superbench/benchmarks/micro_benchmarks/_export_torch_to_onnx.py‎
Lines changed: 12 additions & 4 deletions b/‎superbench/benchmarks/micro_benchmarks/_export_torch_to_onnx.py‎
Lines changed: 12 additions & 4 deletions
diff --git a/‎superbench/benchmarks/micro_benchmarks/huggingface_model_loader.py‎
Lines changed: 11 additions & 26 deletions b/‎superbench/benchmarks/micro_benchmarks/huggingface_model_loader.py‎
Lines changed: 11 additions & 26 deletions
diff --git a/‎superbench/benchmarks/micro_benchmarks/model_source_config.py‎
Lines changed: 4 additions & 13 deletions b/‎superbench/benchmarks/micro_benchmarks/model_source_config.py‎
Lines changed: 4 additions & 13 deletions
@@ -58,9 +58,7 @@ def run_huggingface_benchmark(model_identifier, precision='float16', batch_size=
 
     logger.info(f'Running ORT inference benchmark with HuggingFace model: {model_identifier}')
 
-    context = BenchmarkRegistry.create_benchmark_context(
-        'ort-inference', platform=Platform.CUDA, parameters=parameters
-    )
+    context = BenchmarkRegistry.create_benchmark_context('ort-inference', platform=Platform.CUDA, parameters=parameters)
     benchmark = BenchmarkRegistry.launch_benchmark(context)
     if benchmark:
         logger.info(
@@ -74,12 +72,14 @@ def run_huggingface_benchmark(model_identifier, precision='float16', batch_size=
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='ORT inference benchmark')
     parser.add_argument(
-        '--model_source', type=str, default='in-house', choices=['in-house', 'huggingface'],
+        '--model_source',
+        type=str,
+        default='in-house',
+        choices=['in-house', 'huggingface'],
         help='Source of the model: in-house (default) or huggingface'
     )
     parser.add_argument(
-        '--model_identifier', type=str, default='bert-base-uncased',
-        help='HuggingFace model identifier'
+        '--model_identifier', type=str, default='bert-base-uncased', help='HuggingFace model identifier'
     )
     parser.add_argument('--precision', type=str, default='float16', choices=['float32', 'float16', 'int8'])
     parser.add_argument('--batch_size', type=int, default=32)
 
@@ -74,12 +74,14 @@ def run_huggingface_benchmark(model_identifier, precision='fp16', batch_size=32,
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='TensorRT inference benchmark')
     parser.add_argument(
-        '--model_source', type=str, default='in-house', choices=['in-house', 'huggingface'],
+        '--model_source',
+        type=str,
+        default='in-house',
+        choices=['in-house', 'huggingface'],
         help='Source of the model: in-house (default) or huggingface'
     )
     parser.add_argument(
-        '--model_identifier', type=str, default='bert-base-uncased',
-        help='HuggingFace model identifier'
+        '--model_identifier', type=str, default='bert-base-uncased', help='HuggingFace model identifier'
     )
     parser.add_argument('--precision', type=str, default='fp16', choices=['fp32', 'fp16', 'int8'])
     parser.add_argument('--batch_size', type=int, default=32)
@@ -89,8 +91,7 @@ def run_huggingface_benchmark(model_identifier, precision='fp16', batch_size=32,
 
     if args.model_source == 'huggingface':
         run_huggingface_benchmark(
-            args.model_identifier, args.precision, args.batch_size,
-            args.seq_length, args.iterations
+            args.model_identifier, args.precision, args.batch_size, args.seq_length, args.iterations
         )
     else:
         run_inhouse_benchmark()
@@ -336,16 +336,24 @@ def forward(self, pixel_values):
                             return outputs[0] if isinstance(outputs, (tuple, list)) else outputs
 
                 wrapped_model = VisionModelWrapper(model)
-                export_args = (dummy_input,)
+                export_args = (dummy_input, )
             else:
                 # NLP models: use input_ids and attention_mask
                 dummy_input = torch.ones((batch_size, seq_length), dtype=torch.int64, device=device)
                 attention_mask = torch.ones((batch_size, seq_length), dtype=torch.int64, device=device)
                 input_names = ['input_ids', 'attention_mask']
                 dynamic_axes = {
-                    'input_ids': {0: 'batch_size', 1: 'seq_length'},
-                    'attention_mask': {0: 'batch_size', 1: 'seq_length'},
-                    'output': {0: 'batch_size'},
+                    'input_ids': {
+                        0: 'batch_size',
+                        1: 'seq_length'
+                    },
+                    'attention_mask': {
+                        0: 'batch_size',
+                        1: 'seq_length'
+                    },
+                    'output': {
+                        0: 'batch_size'
+                    },
                 }
 
                 # Wrapper for NLP models
 
@@ -47,7 +47,6 @@ class HuggingFaceModelLoader:
         cache_dir: Directory to cache downloaded models.
         token: HuggingFace authentication token for private/gated models.
     """
-
     def __init__(self, cache_dir: Optional[str] = None, token: Optional[str] = None):
         """Initialize the HuggingFace model loader.
 
@@ -100,11 +99,7 @@ def load_model(
             dtype = self._get_torch_dtype(torch_dtype) if torch_dtype else None
 
             # Prepare loading kwargs
-            load_kwargs = {
-                'cache_dir': self.cache_dir,
-                'revision': revision,
-                **kwargs
-            }
+            load_kwargs = {'cache_dir': self.cache_dir, 'revision': revision, **kwargs}
 
             # Add token if available
             if self.token:
@@ -117,19 +112,15 @@ def load_model(
             # Load config (use pre-downloaded config if provided)
             if config is None:
                 logger.info('Loading model configuration...')
-                config = AutoConfig.from_pretrained(
-                    model_identifier, trust_remote_code=True, **load_kwargs
-                )
+                config = AutoConfig.from_pretrained(model_identifier, trust_remote_code=True, **load_kwargs)
             else:
                 logger.info('Using pre-downloaded model configuration.')
 
             # Load tokenizer (may fail for some models, that's ok)
             tokenizer = None
             try:
                 logger.info('Loading tokenizer...')
-                tokenizer = AutoTokenizer.from_pretrained(
-                    model_identifier, trust_remote_code=True, **load_kwargs
-                )
+                tokenizer = AutoTokenizer.from_pretrained(model_identifier, trust_remote_code=True, **load_kwargs)
             except Exception as e:
                 logger.warning(f'Could not load tokenizer: {e}. Continuing without tokenizer.')
 
@@ -179,7 +170,9 @@ def load_model(
             raise ModelLoadError(f"Unexpected error loading model '{model_identifier}': {e}") from e
 
     def load_model_from_config(
-        self, config: ModelSourceConfig, device: Optional[str] = None,
+        self,
+        config: ModelSourceConfig,
+        device: Optional[str] = None,
         config_pretrained: Optional[PretrainedConfig] = None,
     ) -> Tuple[PreTrainedModel, PretrainedConfig, AutoTokenizer]:
         """Load a model using ModelSourceConfig.
@@ -197,10 +190,7 @@ def load_model_from_config(
             ModelLoadError: If model loading fails.
         """
         if not config.is_huggingface():
-            raise ValueError(
-                f"Cannot load model with source '{config.source}'. "
-                "Use 'huggingface' source."
-            )
+            raise ValueError(f"Cannot load model with source '{config.source}'. Use 'huggingface' source.")
 
         # Validate config
         is_valid, error = config.validate()
@@ -244,10 +234,7 @@ def _get_torch_dtype(self, dtype_str: str) -> torch.dtype:
         }
 
         if dtype_str.lower() not in dtype_map:
-            raise ValueError(
-                f"Invalid dtype '{dtype_str}'. "
-                f'Must be one of {list(dtype_map.keys())}'
-            )
+            raise ValueError(f"Invalid dtype '{dtype_str}'.Must be one of {list(dtype_map.keys())}")
 
         return dtype_map[dtype_str.lower()]
 
@@ -289,9 +276,7 @@ def estimate_param_count_from_config(hf_config) -> Optional[int]:
 
             # Embeddings: token + (optional) position
             max_pos = getattr(hf_config, 'max_position_embeddings', 0)
-            has_pos_embed = getattr(hf_config, 'position_embedding_type', None) not in (
-                'rotary', None
-            )
+            has_pos_embed = getattr(hf_config, 'position_embedding_type', None) not in ('rotary', None)
             embed_params = vocab * hidden
             if has_pos_embed and max_pos > 0:
                 embed_params += max_pos * hidden
@@ -346,7 +331,7 @@ def estimate_memory(param_count, precision_str, mode='training'):
         precision_lower = precision_str.lower()
         if precision_lower in ('float16', 'fp16', 'bfloat16', 'bf16'):
             bytes_per_param = 2
-        elif precision_lower in ('int8',):
+        elif precision_lower in ('int8', ):
             bytes_per_param = 1
         else:
             bytes_per_param = 4
@@ -368,7 +353,7 @@ def estimate_memory(param_count, precision_str, mode='training'):
             except ImportError:
                 logger.warning('psutil not installed — cannot check system memory. Skipping memory check.')
                 return 0, 0, True
-            max_gpu_mem = 80 * (1024 ** 3)  # 80GB — largest common single-GPU memory
+            max_gpu_mem = 80 * (1024**3)    # 80GB — largest common single-GPU memory
             effective_mem = min(sys_mem, max_gpu_mem)
             fits = (estimated_bytes / effective_mem) < 0.85
             return estimated_bytes, effective_mem, fits
 
@@ -33,7 +33,7 @@ class ModelSourceConfig:
     revision: Optional[str] = None
     cache_dir: Optional[str] = None
     device_map: Optional[str] = None
-    use_auth_token: Optional[str] = None  # Deprecated
+    use_auth_token: Optional[str] = None    # Deprecated
     additional_kwargs: Dict[str, Any] = field(default_factory=dict)
 
     def __post_init__(self):
@@ -45,18 +45,12 @@ def __post_init__(self):
         # Normalize and validate source
         self.source = self.source.lower()
         if self.source not in ['in-house', 'huggingface']:
-            raise ValueError(
-                f"Invalid model source '{self.source}'. "
-                f"Must be 'in-house' or 'huggingface'."
-            )
+            raise ValueError(f"Invalid model source '{self.source}'.Must be 'in-house' or 'huggingface'.")
 
         # Validate torch_dtype
         valid_dtypes = ['float32', 'float16', 'bfloat16', 'int8']
         if self.torch_dtype not in valid_dtypes:
-            raise ValueError(
-                f"Invalid torch_dtype '{self.torch_dtype}'. "
-                f'Must be one of {valid_dtypes}.'
-            )
+            raise ValueError(f"Invalid torch_dtype '{self.torch_dtype}'.Must be one of {valid_dtypes}.")
 
         # Validate identifier is provided
         if not self.identifier:
@@ -72,10 +66,7 @@ def validate(self) -> Tuple[bool, str]:
         # Check identifier is not empty for HuggingFace models
         if self.source == 'huggingface':
             if not self.identifier or not self.identifier.strip():
-                return (
-                    False,
-                    'HuggingFace model identifier cannot be empty'
-                )
+                return (False, 'HuggingFace model identifier cannot be empty')
 
         return (True, '')