446446 },
447447 "algorithm" : {
448448 "method" : "local_hessian" ,
449+ "hessian_type" : "local" ,
450+ "fp8_scale_sweep" : True ,
451+ },
452+ }
453+
454+ NVFP4_GLOBAL_HESSIAN_CFG = {
455+ "quant_cfg" : {
456+ "*weight_quantizer" : {
457+ "num_bits" : (2 , 1 ),
458+ "block_sizes" : {- 1 : 16 , "type" : "static" , "scale_bits" : (4 , 3 )},
459+ "axis" : None ,
460+ "enable" : True ,
461+ },
462+ "*input_quantizer" : {
463+ "enable" : False ,
464+ },
465+ ** _default_disabled_quantizer_cfg ,
466+ },
467+ "algorithm" : {
468+ "method" : "local_hessian" ,
469+ "hessian_type" : "global" ,
449470 "fp8_scale_sweep" : True ,
450471 },
451472}
@@ -1125,23 +1146,42 @@ class MseCalibConfig(QuantizeAlgorithmConfig):
11251146
11261147
11271148class LocalHessianCalibConfig (QuantizeAlgorithmConfig ):
1128- """Configuration for local Hessian-weighted MSE calibration.
1149+ """Configuration for Hessian-weighted MSE calibration.
11291150
11301151 This algorithm uses activation information to optimize per-block scales for weight
11311152 quantization. It minimizes the output reconstruction error by weighting the loss
1132- with the local Hessian matrix computed from input activations.
1153+ with the Hessian matrix computed from input activations (and optionally output gradients) .
11331154
1134- The local Hessian loss for each block is: ``(dw @ H @ dw.T)`` where:
1155+ The Hessian loss for each block is: ``(dw @ H @ dw.T)`` where:
11351156 - ``dw = weight - quantized_weight`` (weight reconstruction error per block)
1136- - ``H = X @ X.T`` is the local Hessian computed from input activations X
1157+ - ``H`` is the Hessian matrix (local or global, depending on ``hessian_type``)
1158+
1159+ Two Hessian types are supported:
1160+
1161+ - **local**: ``H = X @ X.T`` - uses only input activations. Faster, no backward pass needed.
1162+ - **global**: ``H = (X * grad²) @ X.T`` - weights by output gradient squared.
1163+ More accurate as it accounts for output importance, but requires backward pass.
11371164
11381165 This method is particularly effective for NVFP4 weight-only quantization where
11391166 activation information helps select better per-block scales.
1140-
11411167 """
11421168
11431169 method : Literal ["local_hessian" ] = ModeloptField ("local_hessian" )
11441170
1171+ hessian_type : Literal ["local" , "global" ] = ModeloptField (
1172+ default = "local" ,
1173+ title = "Type of Hessian to compute." ,
1174+ description = """Type of Hessian matrix to use for weighting quantization errors:
1175+
1176+ - ``"local"``: H = X @ X.T - Only uses input activations. Fast, forward-pass only.
1177+ - ``"global"``: H = (X * grad²) @ X.T - Weights by output gradient squared.
1178+ More accurate as it captures output importance, but requires backward pass
1179+ during calibration.
1180+
1181+ The global Hessian is closer to the true Fisher Information and typically
1182+ gives better results, but at the cost of running backward passes.""" ,
1183+ )
1184+
11451185 step_size : float | None = ModeloptField (
11461186 default = 0.1 ,
11471187 gt = 0.0 ,
@@ -1175,8 +1215,8 @@ class LocalHessianCalibConfig(QuantizeAlgorithmConfig):
11751215 block_size : int | None = ModeloptField (
11761216 default = 16 ,
11771217 gt = 0 ,
1178- title = "Block size for local Hessian computation." ,
1179- description = "The block size used for computing the local Hessian matrix. "
1218+ title = "Block size for Hessian computation." ,
1219+ description = "The block size used for computing the Hessian matrix. "
11801220 "This should match the block size used in the quantization config. "
11811221 "Default is 16 for NVFP4." ,
11821222 )
@@ -1190,7 +1230,7 @@ class LocalHessianCalibConfig(QuantizeAlgorithmConfig):
11901230 debug : bool | None = ModeloptField (
11911231 default = False ,
11921232 title = "Debug mode." ,
1193- description = "If True, module's local Hessian metadata will be kept as a module attribute." ,
1233+ description = "If True, module's Hessian metadata will be kept as a module attribute." ,
11941234 )
11951235
11961236
0 commit comments