|
387 | 387 | "algorithm": "max", |
388 | 388 | } |
389 | 389 |
|
| 390 | +NVFP4_WEIGHT_ACT_MSE_CFG = { |
| 391 | + "quant_cfg": { |
| 392 | + "*weight_quantizer": { |
| 393 | + "num_bits": (2, 1), |
| 394 | + "block_sizes": {-1: 16, "type": "static", "scale_bits": (4, 3)}, |
| 395 | + "axis": None, |
| 396 | + "enable": True, |
| 397 | + }, |
| 398 | + "*input_quantizer": { |
| 399 | + "num_bits": (2, 1), |
| 400 | + "block_sizes": {-1: 16, "type": "dynamic", "scale_bits": (4, 3)}, |
| 401 | + "axis": None, |
| 402 | + "enable": True, |
| 403 | + }, |
| 404 | + **_default_disabled_quantizer_cfg, |
| 405 | + }, |
| 406 | + "algorithm": { |
| 407 | + "method": "mse", |
| 408 | + "step_size": 0.25, |
| 409 | + "start_multiplier": 0.25, |
| 410 | + "stop_multiplier": 2.0, |
| 411 | + }, |
| 412 | +} |
| 413 | + |
| 414 | +NVFP4_WEIGHT_MSE_FP8_SWEEP_CFG = { |
| 415 | + "quant_cfg": { |
| 416 | + "*weight_quantizer": { |
| 417 | + "num_bits": (2, 1), |
| 418 | + "block_sizes": {-1: 16, "type": "static", "scale_bits": (4, 3)}, |
| 419 | + "axis": None, |
| 420 | + "enable": True, |
| 421 | + }, |
| 422 | + "*input_quantizer": { |
| 423 | + "enable": False, |
| 424 | + }, |
| 425 | + **_default_disabled_quantizer_cfg, |
| 426 | + }, |
| 427 | + "algorithm": { |
| 428 | + "method": "mse", |
| 429 | + "fp8_scale_sweep": True, |
| 430 | + }, |
| 431 | +} |
| 432 | + |
| 433 | + |
| 434 | +NVFP4_LOCAL_HESSIAN_CFG = { |
| 435 | + "quant_cfg": { |
| 436 | + "*weight_quantizer": { |
| 437 | + "num_bits": (2, 1), |
| 438 | + "block_sizes": {-1: 16, "type": "static", "scale_bits": (4, 3)}, |
| 439 | + "axis": None, |
| 440 | + "enable": True, |
| 441 | + }, |
| 442 | + "*input_quantizer": { |
| 443 | + "enable": False, |
| 444 | + }, |
| 445 | + **_default_disabled_quantizer_cfg, |
| 446 | + }, |
| 447 | + "algorithm": { |
| 448 | + "method": "local_hessian", |
| 449 | + "fp8_scale_sweep": True, |
| 450 | + }, |
| 451 | +} |
| 452 | + |
390 | 453 | NVFP4_AWQ_LITE_CFG = { |
391 | 454 | "quant_cfg": { |
392 | 455 | "*weight_quantizer": { |
@@ -1059,6 +1122,76 @@ class MseCalibConfig(QuantizeAlgorithmConfig): |
1059 | 1122 | ) |
1060 | 1123 |
|
1061 | 1124 |
|
| 1125 | +class LocalHessianCalibConfig(QuantizeAlgorithmConfig): |
| 1126 | + """Configuration for local Hessian-weighted MSE calibration. |
| 1127 | +
|
| 1128 | + This algorithm uses activation information to optimize per-block scales for weight |
| 1129 | + quantization. It minimizes the output reconstruction error by weighting the loss |
| 1130 | + with the local Hessian matrix computed from input activations. |
| 1131 | +
|
| 1132 | + The local Hessian loss for each block is: ``(dw @ H @ dw.T)`` where: |
| 1133 | + - ``dw = weight - quantized_weight`` (weight reconstruction error per block) |
| 1134 | + - ``H = X @ X.T`` is the local Hessian computed from input activations X |
| 1135 | +
|
| 1136 | + This method is particularly effective for NVFP4 weight-only quantization where |
| 1137 | + activation information helps select better per-block scales. |
| 1138 | +
|
| 1139 | + """ |
| 1140 | + |
| 1141 | + method: Literal["local_hessian"] = ModeloptField("local_hessian") |
| 1142 | + |
| 1143 | + step_size: float | None = ModeloptField( |
| 1144 | + default=0.1, |
| 1145 | + gt=0.0, |
| 1146 | + title="Step size for amax search.", |
| 1147 | + description="Step size between amax candidates. The number of candidates is computed as " |
| 1148 | + "ceil((stop_multiplier - start_multiplier) / step_size) + 1.", |
| 1149 | + ) |
| 1150 | + |
| 1151 | + start_multiplier: float | None = ModeloptField( |
| 1152 | + default=0.25, |
| 1153 | + gt=0.0, |
| 1154 | + title="Starting multiplier for amax search.", |
| 1155 | + description="Starting multiplier for amax search range (multiplies initial amax).", |
| 1156 | + ) |
| 1157 | + |
| 1158 | + stop_multiplier: float | None = ModeloptField( |
| 1159 | + default=4.0, |
| 1160 | + gt=0.0, |
| 1161 | + title="Ending multiplier for amax search.", |
| 1162 | + description="Ending multiplier for amax search range (multiplies initial amax).", |
| 1163 | + ) |
| 1164 | + |
| 1165 | + fp8_scale_sweep: bool | None = ModeloptField( |
| 1166 | + default=True, |
| 1167 | + title="Enable FP8 scale sweep for NVFP4 per-block quantization.", |
| 1168 | + description="If True, sweep over all 128 possible FP8 E4M3 scale values " |
| 1169 | + "for NVFP4 per-block quantization instead of using multipliers. " |
| 1170 | + "This is the recommended setting for NVFP4 quantization.", |
| 1171 | + ) |
| 1172 | + |
| 1173 | + block_size: int | None = ModeloptField( |
| 1174 | + default=16, |
| 1175 | + gt=0, |
| 1176 | + title="Block size for local Hessian computation.", |
| 1177 | + description="The block size used for computing the local Hessian matrix. " |
| 1178 | + "This should match the block size used in the quantization config. " |
| 1179 | + "Default is 16 for NVFP4.", |
| 1180 | + ) |
| 1181 | + |
| 1182 | + distributed_sync: bool | None = ModeloptField( |
| 1183 | + default=True, |
| 1184 | + title="Whether to sync the amax across the distributed processes.", |
| 1185 | + description="If True, the amax will be synced across the distributed processes.", |
| 1186 | + ) |
| 1187 | + |
| 1188 | + debug: bool | None = ModeloptField( |
| 1189 | + default=False, |
| 1190 | + title="Debug mode.", |
| 1191 | + description="If True, module's local Hessian metadata will be kept as a module attribute.", |
| 1192 | + ) |
| 1193 | + |
| 1194 | + |
1062 | 1195 | class SmoothQuantCalibConfig(QuantizeAlgorithmConfig): |
1063 | 1196 | """The config for ``smoothquant`` algorithm (SmoothQuant). |
1064 | 1197 |
|
|
0 commit comments