Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion fms_mo/prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -869,7 +869,7 @@ def qmodel_prep(
model, device_ids=DPorDDPdevices
)

qconfig_save(qcfg, "qcfg.json")
qconfig_save(qcfg, fname="qcfg.json")
qcfg["tb_writer"] = tb_writer

logger.info(f"--- Quantized model --- \n{model}\n")
Expand Down
2 changes: 1 addition & 1 deletion fms_mo/quant/ptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -2537,7 +2537,7 @@ def dq_llm(model, scale, qcfg):

for name, module in model.named_modules():
if isinstance(module, (QLinear,)):
if any(x in name for x in qcfg["scale_layers"]):
if any(x in name for x in qcfg["smoothq_scale_layers"]):
module.set_act_scale(scale[name])
logger.info(
f"Apply layer {name} with activation scales (10)"
Expand Down
2 changes: 1 addition & 1 deletion fms_mo/recipes/dq.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"decoder_arch": true,
"align_zero": true,
"qgroup": null,
"act_scale_path": null,
"smoothq_act_scale_path": null,
"qmodel_calibration_new": 10,
"qskip_large_mag_layers": true,
"ptq_nbatch": 128,
Expand Down
2 changes: 1 addition & 1 deletion fms_mo/utils/aiu_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ def save_for_aiu(
"qa_mode",
"qw_mode",
"smoothq",
"scale_layers",
"smoothq_scale_layers",
"qskip_layer_name",
"qskip_large_mag_layers",
"recompute_narrow_weights",
Expand Down
26 changes: 13 additions & 13 deletions fms_mo/utils/dq_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ def config_quantize_smooth_layers(qcfg: dict):
"""Update qcfg with model-dependent config parameters:
- qlayer_name_pattern: identifier of transformer layers containing linear layers
to quantize (if any, tracing is bypassed)
- scale_layers: identifier of linear layers to apply smoothquant on
- qskip_layer_name: full name of linear layers that will not be quantized
- act_scale_path: path to save/load smoothquant activation scales
- smoothq_scale_layers: identifier of linear layers to apply smoothquant on
- smoothq_act_scale_path: path to save/load smoothquant activation scales

Selected model is determined by comparing all architecture identifiers against
`model` and `model_type` fields in qcfg.
Expand Down Expand Up @@ -56,7 +56,7 @@ def config_quantize_smooth_layers(qcfg: dict):
model in qcfg["model_type"] for model in llama_architecture
):
qcfg["qlayer_name_pattern"] = ["model.layers."]
qcfg["scale_layers"] = ["k_proj", "v_proj", "gate_proj", "up_proj"]
qcfg["smoothq_scale_layers"] = ["k_proj", "v_proj", "gate_proj", "up_proj"]
if qcfg["qskip_large_mag_layers"]:
large_mag_layers = {
"2-7b": [1, 30],
Expand All @@ -75,13 +75,13 @@ def config_quantize_smooth_layers(qcfg: dict):
model in qcfg["model_type"] for model in granite_architecture
):
qcfg["qlayer_name_pattern"] = ["model.layers."]
qcfg["scale_layers"] = ["k_proj", "v_proj", "gate_proj", "up_proj"]
qcfg["smoothq_scale_layers"] = ["k_proj", "v_proj", "gate_proj", "up_proj"]
# NOTE: supported granite-v3 models do not need layer skip for large magnitude
elif "mixtral" in qcfg["model"]:
qcfg["qlayer_name_pattern"] = (
["model.layers"] if qcfg["nbits_bmm1"] == 32 else []
)
qcfg["scale_layers"] = ["q_proj", "k_proj", "v_proj", "w1", "w3"]
qcfg["smoothq_scale_layers"] = ["q_proj", "k_proj", "v_proj", "w1", "w3"]
qcfg["qskip_layer_name"] += [
f"model.layers.{i}.block_sparse_moe.gate" for i in range(32)
]
Expand All @@ -98,22 +98,22 @@ def config_quantize_smooth_layers(qcfg: dict):
[31, 7],
]
]
qcfg["act_scale_path"] = "./act_scales/Mixtral-8x7B-v0.1.pt"
qcfg["smoothq_act_scale_path"] = "./act_scales/Mixtral-8x7B-v0.1.pt"
elif any(model in qcfg["model"] for model in bigcode_architecture):
qcfg["qlayer_name_pattern"] = ["transformer.h"]
qcfg["scale_layers"] = ["c_attn", "c_fc"]
qcfg["smoothq_scale_layers"] = ["c_attn", "c_fc"]
# NOTE: supported bigcode models do not need layer skip for large magnitude
if "granite-3b-base-v2" in qcfg["model"]:
qcfg["act_scale_path"] = "./act_scales/granite_3b_base_v2_500_nw.pt"
qcfg["smoothq_act_scale_path"] = "./act_scales/granite_3b_base_v2_500_nw.pt"
if "granite-13b-base-v2" in qcfg["model"]:
qcfg["act_scale_path"] = "./act_scales/granite_13b_base_v2.pt"
qcfg["smoothq_act_scale_path"] = "./act_scales/granite_13b_base_v2.pt"
if "granite-20b-code-base" in qcfg["model"]:
qcfg["act_scale_path"] = "./act_scales/graniteCodeHF_20b_base12.pt"
qcfg["smoothq_act_scale_path"] = "./act_scales/graniteCodeHF_20b_base12.pt"
if "granite-20b-code-instruct" in qcfg["model"]:
qcfg["act_scale_path"] = "./act_scales/graniteCodeHF_20b_base12.pt"
qcfg["smoothq_act_scale_path"] = "./act_scales/graniteCodeHF_20b_base12.pt"
if "granite-34b-code-base" in qcfg["model"]:
qcfg["act_scale_path"] = "./act_scales/graniteCodeHF_34b_base12.pt"
qcfg["smoothq_act_scale_path"] = "./act_scales/graniteCodeHF_34b_base12.pt"
if "granite-34b-code-instruct" in qcfg["model"]:
qcfg["act_scale_path"] = "./act_scales/graniteCodeHF_34b_base12.pt"
qcfg["smoothq_act_scale_path"] = "./act_scales/graniteCodeHF_34b_base12.pt"
else:
raise ValueError("The model architecture is not supported for DQ.")
46 changes: 37 additions & 9 deletions fms_mo/utils/qconfig_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from datetime import date
from importlib.metadata import version
from pathlib import Path
from typing import Any
from typing import Any, Union
import json
import logging
import os
Expand Down Expand Up @@ -113,10 +113,10 @@ def config_defaults() -> dict:
"qkvsync": False,
"extend_act_range": False,
"plotsvg": False,
"qskip_large_mag_layers": False,
# Iterable vars
"qlayer_name_pattern": [],
"qskip_layer_name": [],
"qskip_large_mag_layers": False,
"qspecial_layers": {},
"qsinglesided_name": [],
"clip_val_asst_percentile": (0.1, 99.9),
Expand All @@ -142,21 +142,24 @@ def config_defaults() -> dict:
"temp_disable_calib": False,
"org_batch_size": {},
"ptqmod_to_be_optimized": [],
# SmoothQuant vars
"smoothq": False,
"smoothq_scale_layers": [],
"smoothq_act_scale_path": None,
# Other vars
"which2patch_contextmanager": None,
"force_stop_if_qbmm_auto_check_failed": False,
"world_size": max(1, torch.cuda.device_count()),
"global_rank": 0,
"batch_size": 2,
"keys_to_save": [],
# items could be obsoleted
"output_attentions": False,
"bias_corr": False,
"qwav2vec": False,
"qvit": False,
"numparamsfromloadertomodel": 1,
"gradclip": 0.0,
"smoothq": False,
"keys_to_save": [],
}

return cfg_defaults
Expand Down Expand Up @@ -201,7 +204,7 @@ def find_recipe_json(recipe: str, subdir: str = None) -> Path:
return json_file


def get_recipe(recipe: str, subdir: str = None) -> Any:
def get_recipe(recipe: str, subdir: str = None) -> Union[list, dict]:
"""
Get a json recipe.

Expand All @@ -219,6 +222,10 @@ def get_recipe(recipe: str, subdir: str = None) -> Any:
temp_data = json.load(openfile)
logger.info(f"Loaded settings from {json_file}.")

# Any recipe should be a dict (qcfg) or list (keys_to_save)
if not isinstance(temp_data, (dict, list)):
raise ValueError(f"Loaded recipe {json_file} was not a dict or list")

return temp_data


Expand Down Expand Up @@ -378,8 +385,14 @@ def qconfig_init(recipe: str = None, args: Any = None) -> dict:
# this can be used to load a previously saved ckpt as well
if recipe:
# qcfg recipes should reside in fms_mo/recipes
temp_cfg = get_recipe(recipe)
temp_cfg = qconfig_load(recipe)

if temp_cfg:
if not isinstance(temp_cfg, dict):
raise ValueError(
f"Quantized config recipe={recipe} is not a dictionary"
)

qcfg.update(temp_cfg)
logger.info("Updated config with recipe values")
else:
Expand Down Expand Up @@ -562,7 +575,12 @@ def qconfig_save(

# Next, check in fms_mo/recipes and merge them into a unique set (in case they differ)
keys_to_save_json = get_recipe(recipe)

if keys_to_save_json:
if not isinstance(keys_to_save_json, list):
raise ValueError(f"Save recipe={recipe} is not a list!")

# Merge keys_to_save lists
keys_to_save = list(set(keys_to_save + keys_to_save_json))

# If we found keys to save, fetch them from qcfg
Expand Down Expand Up @@ -604,9 +622,12 @@ def qconfig_save(

def qconfig_load(fname: str = "qcfg.json") -> dict:
"""Read config in json format, work together with qconfig_save"""
if os.path.isfile(fname):
with open(fname, "r", encoding="utf-8") as openfile:
config = json.load(openfile)
config = get_recipe(fname)

if config:
# Check that loaded file is a dict
if not isinstance(config, dict):
raise ValueError(f"Quantized config={fname} is not a dictionary")

# Add back wanted defaults for any missing vars
add_wanted_defaults_to_config(config, minimal=False)
Expand Down Expand Up @@ -856,6 +877,8 @@ def check_config(config: dict, model_dtype: torch.dtype = None) -> None:
"plotsvg",
"ptq_freezecvs",
"ptq_qdrop",
"qskip_large_mag_layers",
"smoothq",
]
for boolean_var_str in boolean_vars_str:
boolean_var = config.get(
Expand Down Expand Up @@ -912,6 +935,7 @@ def check_config(config: dict, model_dtype: torch.dtype = None) -> None:
"firstptqmodule",
"params2optim",
"clip_val_asst_percentile",
"smoothq_scale_layers",
]
for iterable_var_str in iterable_vars_str:
iterable_var_default = default_config.get(iterable_var_str)
Expand Down Expand Up @@ -990,3 +1014,7 @@ def check_config(config: dict, model_dtype: torch.dtype = None) -> None:
f"which2patch_contextmanager = {which2patch_contextmanager} is not one of "
f"the following: {which2patch_contextmanager_settings}"
)

smoothq_act_scale_path = config.get("smoothq_act_scale_path", None)
if smoothq_act_scale_path and not smoothq_act_scale_path.endswith(".pt"):
raise ValueError(f"{smoothq_act_scale_path=} is not a .pt checkpoint")
96 changes: 96 additions & 0 deletions tests/models/test_saveconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,102 @@ def test_save_config_minimal(

delete_config()


def test_double_qconfig_save(
config_fp32: dict,
):
"""
Ensure that using qconfig_save multiple times doesn't fail.

Args:
config_fp32 (dict): Config for fp32 quantization
"""
delete_config()

# Creating a qcfg, then saving again will cause a warning -> ignore it
with pytest.warns(UserWarning, match="qcfg.json already exist, will overwrite."):
qconfig_save(config_fp32, minimal=False)
qconfig_save(config_fp32, minimal=False)

delete_config()

def test_qconfig_save_list_as_dict(
config_fp32: dict,
):
"""
Test that save recipes can't be used as dictionary

Args:
config_fp32 (dict): Config for fp32 quantization
"""
delete_config()

# Fill in keys_to_save as dict with nonsense val
config_fp32["keys_to_save"] = {
"qa_mode": None,
"qw_mode": None,
"smoothq": None,
"scale_layers": None,
"qskip_layer_name": None,
"qskip_large_mag_layers": None,
}

with pytest.raises(ValueError):
qconfig_save(config_fp32, minimal=True)

delete_config()


def test_qconfig_save_recipe_as_dict(
config_fp32: dict,
):
"""
Test that save recipes can't be used as dictionary

Args:
config_fp32 (dict): Config for fp32 quantization
"""
delete_config()

# Fill in keys_to_save as dict with nonsense val
save_dict = {
"qa_mode": None,
"qw_mode": None,
"smoothq": None,
"scale_layers": None,
"qskip_layer_name": None,
"qskip_large_mag_layers": None,
}
save_json(save_dict, file_path="keys_to_save.json")


with pytest.raises(ValueError):
qconfig_save(config_fp32, recipe="keys_to_save.json", minimal=True)

delete_config()


def test_qconfig_load_with_recipe_as_list(
config_fp32: dict,
):
"""
Test if using qconfig_load errors when loading a json list

Args:
config_fp32 (dict): Config for fp32 quantization
"""
delete_config()

config_list = list( config_fp32.keys() )

save_json(config_list, file_path="qcfg.json")

with pytest.raises(ValueError):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: tiny comment is to actually check that it fails Quantized config={fname} is not a dictionary but optional

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I am missing something, but I setup qconfig_load to only accept a dict datatype from the json.load, or it errors. So this check should force an error.

_ = qconfig_load(fname="qcfg.json")

delete_config()


def test_load_config_restored_pair(
config_fp32: dict,
wanted_pair: tuple,
Expand Down
Loading