Skip to content

Commit d562ac2

Browse files
committed
Run pre-commit
1 parent a3d2ba0 commit d562ac2

20 files changed

Lines changed: 88 additions & 93 deletions

File tree

microsoft-Phi-4-reasoning/QAIRT/README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,4 +55,3 @@ olive run --config htp_sc8480xp.json
5555
```
5656

5757
## Execution Instructions
58-

microsoft-Phi-4-reasoning/QAIRT/config/mixed_precision_config/exceptions.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@
7575
}
7676
]
7777
}
78-
},
78+
},
7979
{
8080
"module_name": "\\w*v_proj_(MatMul|conv_Conv|conv2d_Conv|Conv)(\\.base_layer)?",
8181
"exceptions": {

microsoft-Phi-4-reasoning/QAIRT/genai_lib/common/debug/profiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,4 +347,4 @@ def generate_event_report(event_list: List[Dict[str, Union[int, str]]], max_memo
347347
os.path.abspath(args.profiling_log),
348348
time.ctime(os.path.getmtime(args.profiling_log)),
349349
generate_event_report(events, args.max_memory_threshold)
350-
))
350+
))

microsoft-Phi-4-reasoning/QAIRT/genai_lib/common/dev/model_adaptation/linear_to_conv.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,4 +71,4 @@ def replace_linears_with_convs(model: torch.nn.Module) -> torch.nn.Module:
7171
conv_layer = ConvInplaceLinear(module)
7272
rsetattr(model, name, conv_layer)
7373

74-
return model
74+
return model

microsoft-Phi-4-reasoning/QAIRT/genai_lib/llm/dev/model_adaptation/phi/adaptation.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
# limitations under the License.
3030
# =============================================================================
3131

32-
""" This file provides adaptations to the Phi3 model. These adaptations are being done to
32+
""" This file provides adaptations to the Phi3 model. These adaptations are being done to
3333
optimize the model execution on the HTP backend.
3434
https://github.com/huggingface/transformers/blob/main/src/transformers/models/phi3/modeling_phi3.py"""
3535

@@ -97,7 +97,7 @@ def __init__(self, config: Phi3Config, layer_idx: int):
9797
if getattr(config, "anchor_alpha", None) is not None:
9898
self.anchor_updater = AnchorUpdaterKeySecond(alpha=config.anchor_alpha)
9999

100-
100+
101101

102102
"""Multi-headed attention from 'Attention Is All You Need' paper"""
103103
def unpack_qkv(self):
@@ -117,7 +117,7 @@ def unpack_qkv(self):
117117
self.q_proj.weight.data.copy_(self.qkv_proj.weight[:total_hidden_size, :])
118118
self.k_proj.weight.data.copy_(self.qkv_proj.weight[total_hidden_size: total_hidden_size + key_value_size, :])
119119
self.v_proj.weight.data.copy_(self.qkv_proj.weight[total_hidden_size + key_value_size:, :])
120-
120+
121121
def forward(
122122
self,
123123
hidden_states: torch.Tensor,
@@ -346,7 +346,7 @@ def forward(
346346
cache_index: Optional[torch.Tensor]=None,
347347
**kwargs,
348348
) -> Union[Tuple, CausalLMOutputWithPast]:
349-
349+
350350
logits_to_keep = logits_to_keep if logits_to_keep else getattr(self.config, "logits_to_keep", 0)
351351

352352
if cache_index is not None:
@@ -373,7 +373,7 @@ def forward(
373373
valid_token_mask=valid_token_mask,
374374
anchor_buffer=anchor_buffer,
375375
**kwargs)
376-
376+
377377
if version('transformers') >= '4.48.0':
378378
if return_dict:
379379
assert type(outputs.past_key_values) != tuple
@@ -436,7 +436,7 @@ def DynamicCache_to_legacy_cache(self):
436436
if "anchor_buffer" in dir(self):
437437
return (legacy_cache, self.anchor_buffer)
438438
return legacy_cache
439-
439+
440440
class QcPhi3Model(Phi3Model):
441441

442442
def forward(
@@ -477,4 +477,4 @@ def forward(
477477
else:
478478
raise ValueError(f"Model output is expected to be an instance of BaseModelOutputWithPast or Tuple, got {type(outputs)}")
479479

480-
return outputs
480+
return outputs

microsoft-Phi-4-reasoning/QAIRT/genai_lib/llm/dev/model_adaptation/phi/utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,4 +87,3 @@ def _get_model(model_id_or_path):
8787
config.num_hidden_layers = 1
8888
model = Phi3Model(config)
8989
return model
90-

microsoft-Phi-4-reasoning/QAIRT/genai_lib/llm/evaluation_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,4 +120,4 @@ def llm_evaluate_ppl_with_dataloader(model, dataloader, num_batches=None, model_
120120
nlls.append(llm_compute_loss_from_logits(outputs, batch["input_ids"]))
121121
del outputs
122122
ppl = torch.exp(torch.stack(nlls).mean())
123-
return float(ppl)
123+
return float(ppl)

microsoft-Phi-4-reasoning/QAIRT/genai_lib/llm/long_context_utils.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,13 @@ def __init__(self, num_keys):
5757
def forward(self, keys: tuple, anchor_buffer: tuple):
5858
"""
5959
inputs:
60-
keys: tuple of length config.num_hidden_layers
60+
keys: tuple of length config.num_hidden_layers
6161
where each item is of shape [bsz, heads, head_dim, context_len]
62-
63-
anchor: tuple of length config.num_hidden_layers
62+
63+
anchor: tuple of length config.num_hidden_layers
6464
where each item is of shape [bsz, heads, 1, head_dim]
6565
outputs:
66-
score: tuple of length config.num_hidden_layers
66+
score: tuple of length config.num_hidden_layers
6767
where each item is of shape [bsz, heads, 1, contex_len]
6868
"""
6969
score = ()
@@ -76,7 +76,7 @@ def forward(self, keys: tuple, anchor_buffer: tuple):
7676

7777
return score
7878

79-
79+
8080
def get_scorer_input_output_names(num_hidden_layers):
8181

8282
"""
@@ -91,15 +91,15 @@ def _get_names(pfx, sfx, n_layers):
9191
all = []
9292
for i in range(n_layers):
9393
all.append(f'{pfx}_{i}_{sfx}')
94-
return all
94+
return all
9595

9696
input_names=[]
9797
input_names += _get_names("keys", "in", num_hidden_layers)
9898
input_names += _get_names("anchor_buffer", "in", num_hidden_layers)
9999

100100
output_names=[]
101101
output_names += _get_names("score", "out", num_hidden_layers)
102-
return input_names, output_names
102+
return input_names, output_names
103103

104104

105105

@@ -169,12 +169,12 @@ def llm_compute_scores(scorer, past_key_values, anchor, valid_kv_len=None, pad_t
169169
for score in scores:
170170
if valid_kv_len is not None:
171171
max_values, _ = torch.max(score, dim=3, keepdim=True)
172-
172+
173173
if pad_to_left:
174174
score[:, :, : ,:-valid_kv_len] = max_values
175175
else:
176176
score[:, :, :, valid_kv_len:] = max_values
177-
177+
178178
updated_scores += (score,)
179179

180180
# the assertion ensures that there is parity between the shape of scores and past_kv shape along the sequence dimension.

microsoft-Phi-4-reasoning/QAIRT/genai_lib/llm/static_graph_utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,11 @@ def llm_slice_inputs_for_inference(max_input_tokens, model_context_len, input_id
6464
After 1st iteration: accumulated KV$ = 1
6565
After 2nd iteration: accumulated KV$ = 4
6666
After 3rd iteration: accumulated KV$ = 7
67-
67+
6868
Now, when sending the last slice of 3, we will either pad it left or right, irrespective of that, the past KV$
6969
that can flow into the model/ or the KV$ that the current input slice will attend to can only be ctx_len-ARN, hence
7070
we will only look at 7 which is accumulated accurately until this point.
71-
71+
7272
Hence, we can pass ctx_len worth of input chunk into the model without needing any eviction logic here.
7373
This is the default behavior.
7474
"""
@@ -101,11 +101,11 @@ def llm_slice_inputs_for_inference(max_input_tokens, model_context_len, input_id
101101
After 1st iteration: accumulated KV$ = 3
102102
After 2nd iteration: accumulated KV$ = 6
103103
After 3rd iteration: accumulated KV$ = 9
104-
104+
105105
Now, when sending the last slice of 1, we will either pad it left or right, irrespective of that, the past KV$
106106
that can flow into the model/ or the KV$ that the current input slice will attent to can only be ctx_len-ARN, hence
107107
we will only look at 7 (instead of 9 KV$) and loose information as we need to evict 2 KV$
108-
108+
109109
More importantly, we will have to evict this extra KV$ otherwise we will run into issues.
110110
"""
111111
for idx in range(0, input_length, max_input_tokens):

microsoft-Phi-4-reasoning/QAIRT/genai_lib/llm/utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,4 +119,3 @@ def llm_search_layers_by_type(model, module_type):
119119
if isinstance(module, module_type):
120120
embedding_layers.append(module)
121121
return embedding_layers
122-

0 commit comments

Comments
 (0)