|
97 | 97 | observer_or_fake_quant_ctr=MinMaxObserver, |
98 | 98 | ) |
99 | 99 |
|
| 100 | +wgt_qspec_sym8s_127 = QuantizationSpec( |
| 101 | + dtype=torch.int8, |
| 102 | + quant_min=-127, |
| 103 | + quant_max=127, |
| 104 | + qscheme=torch.per_tensor_symmetric, |
| 105 | + is_dynamic=False, |
| 106 | + observer_or_fake_quant_ctr=MinMaxObserver, |
| 107 | +) |
| 108 | + |
100 | 109 | bias_qspec: Optional[QuantizationSpec] = None |
101 | 110 |
|
102 | 111 | qconfig_A8W8 = QuantizationConfig( |
|
161 | 170 | None, |
162 | 171 | ) |
163 | 172 |
|
164 | | -qconfig_A32W8sym = QuantizationConfig( |
| 173 | +qconfig_A32W8sym_127 = QuantizationConfig( |
165 | 174 | input_activation=None, |
166 | 175 | output_activation=None, |
167 | | - weight=wgt_qspec_sym8s, |
168 | | - bias=wgt_qspec_sym8s, |
| 176 | + weight=wgt_qspec_sym8s_127, |
| 177 | + bias=wgt_qspec_sym8s_127, |
169 | 178 | ) |
170 | 179 |
|
171 | 180 |
|
@@ -417,13 +426,13 @@ class CadenceW8A32MixedQuantizer(CadenceQuantizer): |
417 | 426 | def __init__(self) -> None: |
418 | 427 | quantizers = [] |
419 | 428 | quantizers.append( |
420 | | - CadenceAtenQuantizer(MixedW8A32LinearPattern(), qconfig_A32W8sym) |
| 429 | + CadenceAtenQuantizer(MixedW8A32LinearPattern(), qconfig_A32W8sym_127) |
421 | 430 | ) |
422 | 431 | quantizers.append( |
423 | | - CadenceAtenQuantizer(MixedW8A32ConvPattern(), qconfig_A32W8sym) |
| 432 | + CadenceAtenQuantizer(MixedW8A32ConvPattern(), qconfig_A32W8sym_127) |
424 | 433 | ) |
425 | 434 | quantizers.append( |
426 | | - CadenceAtenQuantizer(MixedW8A32GruPattern(), qconfig_A32W8sym) |
| 435 | + CadenceAtenQuantizer(MixedW8A32GruPattern(), qconfig_A32W8sym_127) |
427 | 436 | ) |
428 | 437 | super().__init__(quantizers) |
429 | 438 |
|
|
0 commit comments