Run pre-commit

qti-kromero · qti-kromero · commit d562ac292b39 · 2026-04-07T09:51:28.000-07:00
diff --git a/microsoft-Phi-4-reasoning/QAIRT/README.md b/microsoft-Phi-4-reasoning/QAIRT/README.md
@@ -55,4 +55,3 @@ olive run --config htp_sc8480xp.json
 ```
 
 ## Execution Instructions
-
diff --git a/microsoft-Phi-4-reasoning/QAIRT/config/mixed_precision_config/exceptions.json b/microsoft-Phi-4-reasoning/QAIRT/config/mixed_precision_config/exceptions.json
@@ -75,7 +75,7 @@
           }
         ]
       }
-    }, 
+    },
     {
       "module_name": "\\w*v_proj_(MatMul|conv_Conv|conv2d_Conv|Conv)(\\.base_layer)?",
       "exceptions": {
diff --git a/microsoft-Phi-4-reasoning/QAIRT/genai_lib/common/debug/profiler.py b/microsoft-Phi-4-reasoning/QAIRT/genai_lib/common/debug/profiler.py
@@ -347,4 +347,4 @@ def generate_event_report(event_list: List[Dict[str, Union[int, str]]], max_memo
         os.path.abspath(args.profiling_log),
         time.ctime(os.path.getmtime(args.profiling_log)),
         generate_event_report(events, args.max_memory_threshold)
-    ))
+    ))
diff --git a/microsoft-Phi-4-reasoning/QAIRT/genai_lib/common/dev/model_adaptation/linear_to_conv.py b/microsoft-Phi-4-reasoning/QAIRT/genai_lib/common/dev/model_adaptation/linear_to_conv.py
@@ -71,4 +71,4 @@ def replace_linears_with_convs(model: torch.nn.Module) -> torch.nn.Module:
             conv_layer = ConvInplaceLinear(module)
             rsetattr(model, name, conv_layer)
 
-    return model
+    return model
diff --git a/microsoft-Phi-4-reasoning/QAIRT/genai_lib/llm/dev/model_adaptation/phi/adaptation.py b/microsoft-Phi-4-reasoning/QAIRT/genai_lib/llm/dev/model_adaptation/phi/adaptation.py
@@ -29,7 +29,7 @@
 # limitations under the License.
 # =============================================================================
 
-""" This file provides adaptations to the Phi3 model. These adaptations are being done to 
+""" This file provides adaptations to the Phi3 model. These adaptations are being done to
 optimize the model execution on the HTP backend.
 https://github.com/huggingface/transformers/blob/main/src/transformers/models/phi3/modeling_phi3.py"""
 
@@ -97,7 +97,7 @@ def __init__(self, config: Phi3Config, layer_idx: int):
         if getattr(config, "anchor_alpha", None) is not None:
             self.anchor_updater = AnchorUpdaterKeySecond(alpha=config.anchor_alpha)
 
-        
+
 
     """Multi-headed attention from 'Attention Is All You Need' paper"""
     def unpack_qkv(self):
@@ -117,7 +117,7 @@ def unpack_qkv(self):
         self.q_proj.weight.data.copy_(self.qkv_proj.weight[:total_hidden_size, :])
         self.k_proj.weight.data.copy_(self.qkv_proj.weight[total_hidden_size: total_hidden_size + key_value_size, :])
         self.v_proj.weight.data.copy_(self.qkv_proj.weight[total_hidden_size + key_value_size:, :])
-        
+
     def forward(
         self,
         hidden_states: torch.Tensor,
@@ -346,7 +346,7 @@ def forward(
             cache_index: Optional[torch.Tensor]=None,
             **kwargs,
     ) -> Union[Tuple, CausalLMOutputWithPast]:
-        
+
         logits_to_keep = logits_to_keep if logits_to_keep else getattr(self.config, "logits_to_keep", 0)
 
         if cache_index is not None:
@@ -373,7 +373,7 @@ def forward(
             valid_token_mask=valid_token_mask,
             anchor_buffer=anchor_buffer,
             **kwargs)
-        
+
         if version('transformers') >= '4.48.0':
             if return_dict:
                 assert type(outputs.past_key_values) != tuple
@@ -436,7 +436,7 @@ def DynamicCache_to_legacy_cache(self):
     if "anchor_buffer" in dir(self):
         return (legacy_cache, self.anchor_buffer)
     return legacy_cache
-    
+
 class QcPhi3Model(Phi3Model):
 
     def forward(
@@ -477,4 +477,4 @@ def forward(
         else:
             raise ValueError(f"Model output is expected to be an instance of BaseModelOutputWithPast or Tuple, got {type(outputs)}")
 
-        return outputs
+        return outputs
diff --git a/microsoft-Phi-4-reasoning/QAIRT/genai_lib/llm/dev/model_adaptation/phi/utils.py b/microsoft-Phi-4-reasoning/QAIRT/genai_lib/llm/dev/model_adaptation/phi/utils.py
@@ -87,4 +87,3 @@ def _get_model(model_id_or_path):
     config.num_hidden_layers = 1
     model = Phi3Model(config)
     return model
-
diff --git a/microsoft-Phi-4-reasoning/QAIRT/genai_lib/llm/evaluation_utils.py b/microsoft-Phi-4-reasoning/QAIRT/genai_lib/llm/evaluation_utils.py
@@ -120,4 +120,4 @@ def llm_evaluate_ppl_with_dataloader(model, dataloader, num_batches=None, model_
         nlls.append(llm_compute_loss_from_logits(outputs, batch["input_ids"]))
         del outputs
     ppl = torch.exp(torch.stack(nlls).mean())
-    return float(ppl)
+    return float(ppl)
diff --git a/microsoft-Phi-4-reasoning/QAIRT/genai_lib/llm/long_context_utils.py b/microsoft-Phi-4-reasoning/QAIRT/genai_lib/llm/long_context_utils.py
@@ -57,13 +57,13 @@ def __init__(self, num_keys):
     def forward(self, keys: tuple, anchor_buffer: tuple):
         """
         inputs:
-            keys: tuple of length config.num_hidden_layers 
+            keys: tuple of length config.num_hidden_layers
                   where each item is of shape [bsz, heads, head_dim, context_len]
-            
-            anchor: tuple of length config.num_hidden_layers 
+
+            anchor: tuple of length config.num_hidden_layers
                     where each item is of shape [bsz, heads, 1, head_dim]
         outputs:
-            score:  tuple of length config.num_hidden_layers 
+            score:  tuple of length config.num_hidden_layers
                     where each item is of shape [bsz, heads, 1, contex_len]
         """
         score = ()
@@ -76,7 +76,7 @@ def forward(self, keys: tuple, anchor_buffer: tuple):
 
         return score
 
-   
+
 def get_scorer_input_output_names(num_hidden_layers):
 
     """
@@ -91,15 +91,15 @@ def _get_names(pfx, sfx, n_layers):
         all = []
         for i in range(n_layers):
             all.append(f'{pfx}_{i}_{sfx}')
-        return all 
+        return all
 
     input_names=[]
     input_names += _get_names("keys", "in", num_hidden_layers)
     input_names += _get_names("anchor_buffer", "in", num_hidden_layers)
 
     output_names=[]
     output_names += _get_names("score", "out", num_hidden_layers)
-    return input_names, output_names   
+    return input_names, output_names
 
 
 
@@ -169,12 +169,12 @@ def llm_compute_scores(scorer, past_key_values, anchor, valid_kv_len=None, pad_t
     for score in scores:
         if valid_kv_len is not None:
             max_values, _ = torch.max(score, dim=3, keepdim=True)
-        
+
             if pad_to_left:
                 score[:, :, : ,:-valid_kv_len] = max_values
             else:
                 score[:, :, :, valid_kv_len:] = max_values
-        
+
         updated_scores += (score,)
 
     # the assertion ensures that there is parity between the shape of scores and past_kv shape along the sequence dimension.
diff --git a/microsoft-Phi-4-reasoning/QAIRT/genai_lib/llm/static_graph_utils.py b/microsoft-Phi-4-reasoning/QAIRT/genai_lib/llm/static_graph_utils.py
@@ -64,11 +64,11 @@ def llm_slice_inputs_for_inference(max_input_tokens, model_context_len, input_id
         After 1st iteration: accumulated KV$ = 1
         After 2nd iteration: accumulated KV$ = 4
         After 3rd iteration: accumulated KV$ = 7
-        
+
         Now, when sending the last slice of 3, we will either pad it left or right, irrespective of that, the past KV$
         that can flow into the model/ or the KV$ that the current input slice will attend to can only be ctx_len-ARN, hence
         we will only look at 7 which is accumulated accurately until this point.
-        
+
         Hence, we can pass ctx_len worth of input chunk into the model without needing any eviction logic here.
         This is the default behavior.
         """
@@ -101,11 +101,11 @@ def llm_slice_inputs_for_inference(max_input_tokens, model_context_len, input_id
         After 1st iteration: accumulated KV$ = 3
         After 2nd iteration: accumulated KV$ = 6
         After 3rd iteration: accumulated KV$ = 9
-        
+
         Now, when sending the last slice of 1, we will either pad it left or right, irrespective of that, the past KV$
         that can flow into the model/ or the KV$ that the current input slice will attent to can only be ctx_len-ARN, hence
         we will only look at 7 (instead of 9 KV$) and loose information as we need to evict 2 KV$
-        
+
         More importantly, we will have to evict this extra KV$ otherwise we will run into issues.
         """
         for idx in range(0, input_length, max_input_tokens):
diff --git a/microsoft-Phi-4-reasoning/QAIRT/genai_lib/llm/utils.py b/microsoft-Phi-4-reasoning/QAIRT/genai_lib/llm/utils.py
@@ -119,4 +119,3 @@ def llm_search_layers_by_type(model, module_type):
         if isinstance(module, module_type):
             embedding_layers.append(module)
     return embedding_layers
-
diff --git a/microsoft-Phi-4-reasoning/QAIRT/htp_sc8380xp.json b/microsoft-Phi-4-reasoning/QAIRT/htp_sc8380xp.json
@@ -1,6 +1,6 @@
 {
-    "input_model": { 
-        "type": "HFModel", 
+    "input_model": {
+        "type": "HFModel",
         "model_path": "microsoft/Phi-4-reasoning"
     },
     "passes": {
diff --git a/microsoft-Phi-4-reasoning/QAIRT/htp_sc8480xp.json b/microsoft-Phi-4-reasoning/QAIRT/htp_sc8480xp.json
@@ -1,6 +1,6 @@
 {
-    "input_model": { 
-        "type": "HFModel", 
+    "input_model": {
+        "type": "HFModel",
         "model_path": "microsoft/Phi-4-reasoning"
     },
     "passes": {
diff --git a/microsoft-Phi-4-reasoning/QAIRT/info.yml b/microsoft-Phi-4-reasoning/QAIRT/info.yml
@@ -9,4 +9,4 @@ recipes:
     - file: "htp_sc8480xp.json"
       devices:
         - npu
-      ep: QNNExecutionProvider
+      ep: QNNExecutionProvider
diff --git a/microsoft-Phi-4-reasoning/QAIRT/llm_utils/mixed_precision_overrides.py b/microsoft-Phi-4-reasoning/QAIRT/llm_utils/mixed_precision_overrides.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # =============================================================================
 #
-#  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. 
+#  Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
 #  All rights reserved.
 #  Confidential and Proprietary - Qualcomm Technologies, Inc.
 #
@@ -108,7 +108,7 @@ def __init__(self, mixed_precision_config_file):
                 print(f"Applying {item['module_name']}:\t{expections_str}")
                 exceptions[etype].update({item['module_name']: item['exceptions']})
         self.exceptions_dict = exceptions
-        
+
     def apply_exceptions(self, quant_sim):
         for etype in ("module", "name"):
             exception_modules = self.exceptions_dict[etype].keys()
diff --git a/microsoft-Phi-4-reasoning/QAIRT/llm_utils/wikitext_dataloader.py b/microsoft-Phi-4-reasoning/QAIRT/llm_utils/wikitext_dataloader.py
@@ -141,4 +141,3 @@ def get_wiki_dataset(block_size, tokenizer, cache_dir):
     test_dataloader = DataLoader(dataset['test'], shuffle=False, batch_size=1, collate_fn=default_data_collator)
 
     return train_dataloader, test_dataloader, dataset
-
diff --git a/microsoft-Phi-4-reasoning/QAIRT/phi4_reasoning_script.py b/microsoft-Phi-4-reasoning/QAIRT/phi4_reasoning_script.py
diff --git a/microsoft-Phi-4-reasoning/QAIRT/requirements.txt b/microsoft-Phi-4-reasoning/QAIRT/requirements.txt
diff --git a/microsoft-Phi-4-reasoning/QAIRT/utilities/enc-validate/encoding_validation.md b/microsoft-Phi-4-reasoning/QAIRT/utilities/enc-validate/encoding_validation.md
diff --git a/microsoft-Phi-4-reasoning/QAIRT/utilities/get_on_target_peak_ram.sh b/microsoft-Phi-4-reasoning/QAIRT/utilities/get_on_target_peak_ram.sh
diff --git a/microsoft-Phi-4-reasoning/QAIRT/utilities/nsptargets.py b/microsoft-Phi-4-reasoning/QAIRT/utilities/nsptargets.py

Original file line number	Diff line number	Diff line change
`@@ -55,4 +55,3 @@ olive run --config htp_sc8480xp.json`
`55`	`55`	```
`56`	`56`
`57`	`57`	`## Execution Instructions`
`58`		`-`
Original file line number	Diff line number	Diff line change
`@@ -75,7 +75,7 @@`
`75`	`75`	`}`
`76`	`76`	`]`
`77`	`77`	`}`
`78`		`- },`
	`78`	`+ },`
`79`	`79`	`{`
`80`	`80`	`"module_name": "\\w*v_proj_(MatMul\|conv_Conv\|conv2d_Conv\|Conv)(\\.base_layer)?",`
`81`	`81`	`"exceptions": {`