We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 011a2a6 commit fac8927Copy full SHA for fac8927
examples/llm_ptq/hf_ptq.py
@@ -966,13 +966,7 @@ def quantize_main(
966
if mtp_layer_prefixes:
967
quant_cfg = copy.deepcopy(quant_cfg)
968
for prefix in mtp_layer_prefixes:
969
- parts = prefix.split(".")
970
- if len(parts) >= 2:
971
- # Multi-component prefix (e.g., "mtp.layers.0" -> "*layers.0*")
972
- pattern = f"*{parts[-2]}.{parts[-1]}*"
973
- else:
974
- # Single-component prefix (e.g., "mtp" -> "*mtp*")
975
- pattern = f"*{prefix}*"
+ pattern = f"*{prefix}*"
976
quant_cfg["quant_cfg"][pattern] = {"enable": False}
977
print(f"Excluding MTP layer from quantization: {pattern}")
978
0 commit comments