foundation-model-stack · andrea-fasoli · May 10, 2025 · May 7, 2025 · May 7, 2025 · May 8, 2025
@@ -869,7 +869,7 @@ def qmodel_prep(
             model, device_ids=DPorDDPdevices
         )
 
-    qconfig_save(qcfg, "qcfg.json")
+    qconfig_save(qcfg, fname="qcfg.json")
     qcfg["tb_writer"] = tb_writer
 
     logger.info(f"--- Quantized model --- \n{model}\n")

@@ -2537,7 +2537,7 @@ def dq_llm(model, scale, qcfg):
 
     for name, module in model.named_modules():
         if isinstance(module, (QLinear,)):
-            if any(x in name for x in qcfg["scale_layers"]):
+            if any(x in name for x in qcfg["smoothq_scale_layers"]):
                 module.set_act_scale(scale[name])
                 logger.info(
                     f"Apply layer {name} with activation scales (10)"

@@ -2,7 +2,7 @@
     "decoder_arch": true,
     "align_zero": true,
     "qgroup": null,
-    "act_scale_path": null,
+    "smoothq_act_scale_path": null,
     "qmodel_calibration_new": 10,
     "qskip_large_mag_layers": true,
     "ptq_nbatch": 128,

@@ -470,7 +470,7 @@ def save_for_aiu(
         "qa_mode",
         "qw_mode",
         "smoothq",
-        "scale_layers",
+        "smoothq_scale_layers",
         "qskip_layer_name",
         "qskip_large_mag_layers",
         "recompute_narrow_weights",

@@ -18,9 +18,9 @@ def config_quantize_smooth_layers(qcfg: dict):
     """Update qcfg with model-dependent config parameters:
     - qlayer_name_pattern: identifier of transformer layers containing linear layers
     to quantize (if any, tracing is bypassed)
-    - scale_layers: identifier of linear layers to apply smoothquant on
     - qskip_layer_name: full name of linear layers that will not be quantized
-    - act_scale_path: path to save/load smoothquant activation scales
+    - smoothq_scale_layers: identifier of linear layers to apply smoothquant on
+    - smoothq_act_scale_path: path to save/load smoothquant activation scales
 
     Selected model is determined by comparing all architecture identifiers against
     `model` and `model_type` fields in qcfg.
@@ -56,7 +56,7 @@ def config_quantize_smooth_layers(qcfg: dict):
         model in qcfg["model_type"] for model in llama_architecture
     ):
         qcfg["qlayer_name_pattern"] = ["model.layers."]
-        qcfg["scale_layers"] = ["k_proj", "v_proj", "gate_proj", "up_proj"]
+        qcfg["smoothq_scale_layers"] = ["k_proj", "v_proj", "gate_proj", "up_proj"]
         if qcfg["qskip_large_mag_layers"]:
             large_mag_layers = {
                 "2-7b": [1, 30],
@@ -75,13 +75,13 @@ def config_quantize_smooth_layers(qcfg: dict):
         model in qcfg["model_type"] for model in granite_architecture
     ):
         qcfg["qlayer_name_pattern"] = ["model.layers."]
-        qcfg["scale_layers"] = ["k_proj", "v_proj", "gate_proj", "up_proj"]
+        qcfg["smoothq_scale_layers"] = ["k_proj", "v_proj", "gate_proj", "up_proj"]
         # NOTE: supported granite-v3 models do not need layer skip for large magnitude
     elif "mixtral" in qcfg["model"]:
         qcfg["qlayer_name_pattern"] = (
             ["model.layers"] if qcfg["nbits_bmm1"] == 32 else []
         )
-        qcfg["scale_layers"] = ["q_proj", "k_proj", "v_proj", "w1", "w3"]
+        qcfg["smoothq_scale_layers"] = ["q_proj", "k_proj", "v_proj", "w1", "w3"]
         qcfg["qskip_layer_name"] += [
             f"model.layers.{i}.block_sparse_moe.gate" for i in range(32)
         ]
@@ -98,22 +98,22 @@ def config_quantize_smooth_layers(qcfg: dict):
                     [31, 7],
                 ]
             ]
-        qcfg["act_scale_path"] = "./act_scales/Mixtral-8x7B-v0.1.pt"
+        qcfg["smoothq_act_scale_path"] = "./act_scales/Mixtral-8x7B-v0.1.pt"
     elif any(model in qcfg["model"] for model in bigcode_architecture):
         qcfg["qlayer_name_pattern"] = ["transformer.h"]
-        qcfg["scale_layers"] = ["c_attn", "c_fc"]
+        qcfg["smoothq_scale_layers"] = ["c_attn", "c_fc"]
         # NOTE: supported bigcode models do not need layer skip for large magnitude
         if "granite-3b-base-v2" in qcfg["model"]:
-            qcfg["act_scale_path"] = "./act_scales/granite_3b_base_v2_500_nw.pt"
+            qcfg["smoothq_act_scale_path"] = "./act_scales/granite_3b_base_v2_500_nw.pt"
         if "granite-13b-base-v2" in qcfg["model"]:
-            qcfg["act_scale_path"] = "./act_scales/granite_13b_base_v2.pt"
+            qcfg["smoothq_act_scale_path"] = "./act_scales/granite_13b_base_v2.pt"
         if "granite-20b-code-base" in qcfg["model"]:
-            qcfg["act_scale_path"] = "./act_scales/graniteCodeHF_20b_base12.pt"
+            qcfg["smoothq_act_scale_path"] = "./act_scales/graniteCodeHF_20b_base12.pt"
         if "granite-20b-code-instruct" in qcfg["model"]:
-            qcfg["act_scale_path"] = "./act_scales/graniteCodeHF_20b_base12.pt"
+            qcfg["smoothq_act_scale_path"] = "./act_scales/graniteCodeHF_20b_base12.pt"
         if "granite-34b-code-base" in qcfg["model"]:
-            qcfg["act_scale_path"] = "./act_scales/graniteCodeHF_34b_base12.pt"
+            qcfg["smoothq_act_scale_path"] = "./act_scales/graniteCodeHF_34b_base12.pt"
         if "granite-34b-code-instruct" in qcfg["model"]:
-            qcfg["act_scale_path"] = "./act_scales/graniteCodeHF_34b_base12.pt"
+            qcfg["smoothq_act_scale_path"] = "./act_scales/graniteCodeHF_34b_base12.pt"
     else:
         raise ValueError("The model architecture is not supported for DQ.")
@@ -18,7 +18,7 @@
 from datetime import date
 from importlib.metadata import version
 from pathlib import Path
-from typing import Any
+from typing import Any, Union
 import json
 import logging
 import os
@@ -113,10 +113,10 @@ def config_defaults() -> dict:
         "qkvsync": False,
         "extend_act_range": False,
         "plotsvg": False,
+        "qskip_large_mag_layers": False,
         # Iterable vars
         "qlayer_name_pattern": [],
         "qskip_layer_name": [],
-        "qskip_large_mag_layers": False,
         "qspecial_layers": {},
         "qsinglesided_name": [],
         "clip_val_asst_percentile": (0.1, 99.9),
@@ -142,21 +142,24 @@ def config_defaults() -> dict:
         "temp_disable_calib": False,
         "org_batch_size": {},
         "ptqmod_to_be_optimized": [],
+        # SmoothQuant vars
+        "smoothq": False,
+        "smoothq_scale_layers": [],
+        "smoothq_act_scale_path": None,
         # Other vars
         "which2patch_contextmanager": None,
         "force_stop_if_qbmm_auto_check_failed": False,
         "world_size": max(1, torch.cuda.device_count()),
         "global_rank": 0,
         "batch_size": 2,
+        "keys_to_save": [],
         # items could be obsoleted
         "output_attentions": False,
         "bias_corr": False,
         "qwav2vec": False,
         "qvit": False,
         "numparamsfromloadertomodel": 1,
         "gradclip": 0.0,
-        "smoothq": False,
-        "keys_to_save": [],
     }
 
     return cfg_defaults
@@ -201,7 +204,7 @@ def find_recipe_json(recipe: str, subdir: str = None) -> Path:
     return json_file
 
 
-def get_recipe(recipe: str, subdir: str = None) -> Any:
+def get_recipe(recipe: str, subdir: str = None) -> Union[list, dict]:
     """
     Get a json recipe.
 
@@ -219,6 +222,10 @@ def get_recipe(recipe: str, subdir: str = None) -> Any:
             temp_data = json.load(openfile)
         logger.info(f"Loaded settings from {json_file}.")
 
+        # Any recipe should be a dict (qcfg) or list (keys_to_save)
+        if not isinstance(temp_data, (dict, list)):
+            raise ValueError(f"Loaded recipe {json_file} was not a dict or list")
+
     return temp_data
 
 
@@ -378,8 +385,14 @@ def qconfig_init(recipe: str = None, args: Any = None) -> dict:
     #    this can be used to load a previously saved ckpt as well
     if recipe:
         # qcfg recipes should reside in fms_mo/recipes
-        temp_cfg = get_recipe(recipe)
+        temp_cfg = qconfig_load(recipe)
+
         if temp_cfg:
+            if not isinstance(temp_cfg, dict):
+                raise ValueError(
+                    f"Quantized config recipe={recipe} is not a dictionary"
+                )
+
             qcfg.update(temp_cfg)
             logger.info("Updated config with recipe values")
         else:
@@ -562,7 +575,12 @@ def qconfig_save(
 
     # Next, check in fms_mo/recipes and merge them into a unique set (in case they differ)
     keys_to_save_json = get_recipe(recipe)
+
     if keys_to_save_json:
+        if not isinstance(keys_to_save_json, list):
+            raise ValueError(f"Save recipe={recipe} is not a list!")
+
+        # Merge keys_to_save lists
         keys_to_save = list(set(keys_to_save + keys_to_save_json))
 
     # If we found keys to save, fetch them from qcfg
@@ -604,9 +622,12 @@ def qconfig_save(
 
 def qconfig_load(fname: str = "qcfg.json") -> dict:
     """Read config in json format, work together with qconfig_save"""
-    if os.path.isfile(fname):
-        with open(fname, "r", encoding="utf-8") as openfile:
-            config = json.load(openfile)
+    config = get_recipe(fname)
+
+    if config:
+        # Check that loaded file is a dict
+        if not isinstance(config, dict):
+            raise ValueError(f"Quantized config={fname} is not a dictionary")
 
         # Add back wanted defaults for any missing vars
         add_wanted_defaults_to_config(config, minimal=False)
@@ -856,6 +877,8 @@ def check_config(config: dict, model_dtype: torch.dtype = None) -> None:
         "plotsvg",
         "ptq_freezecvs",
         "ptq_qdrop",
+        "qskip_large_mag_layers",
+        "smoothq",
     ]
     for boolean_var_str in boolean_vars_str:
         boolean_var = config.get(
@@ -912,6 +935,7 @@ def check_config(config: dict, model_dtype: torch.dtype = None) -> None:
         "firstptqmodule",
         "params2optim",
         "clip_val_asst_percentile",
+        "smoothq_scale_layers",
     ]
     for iterable_var_str in iterable_vars_str:
         iterable_var_default = default_config.get(iterable_var_str)
@@ -990,3 +1014,7 @@ def check_config(config: dict, model_dtype: torch.dtype = None) -> None:
             f"which2patch_contextmanager = {which2patch_contextmanager} is not one of "
             f"the following: {which2patch_contextmanager_settings}"
         )
+
+    smoothq_act_scale_path = config.get("smoothq_act_scale_path", None)
+    if smoothq_act_scale_path and not smoothq_act_scale_path.endswith(".pt"):
+        raise ValueError(f"{smoothq_act_scale_path=} is not a .pt checkpoint")
@@ -179,6 +179,102 @@ def test_save_config_minimal(
 
     delete_config()
 
+
+def test_double_qconfig_save(
+    config_fp32: dict,
+):
+    """
+    Ensure that using qconfig_save multiple times doesn't fail.
+
+    Args:
+        config_fp32 (dict): Config for fp32 quantization
+    """
+    delete_config()
+
+    # Creating a qcfg, then saving again will cause a warning -> ignore it
+    with pytest.warns(UserWarning, match="qcfg.json already exist, will overwrite."):
+        qconfig_save(config_fp32, minimal=False)
+        qconfig_save(config_fp32, minimal=False)
+
+    delete_config()
+
+def test_qconfig_save_list_as_dict(
+    config_fp32: dict,
+):
+    """
+    Test that save recipes can't be used as dictionary
+
+    Args:
+        config_fp32 (dict): Config for fp32 quantization
+    """
+    delete_config()
+
+    # Fill in keys_to_save as dict with nonsense val
+    config_fp32["keys_to_save"] = {
+        "qa_mode": None,
+        "qw_mode": None,
+        "smoothq": None,
+        "scale_layers": None,
+        "qskip_layer_name": None,
+        "qskip_large_mag_layers": None,
+    }
+
+    with pytest.raises(ValueError):
+        qconfig_save(config_fp32, minimal=True)
+
+    delete_config()
+
+
+def test_qconfig_save_recipe_as_dict(
+    config_fp32: dict,
+):
+    """
+    Test that save recipes can't be used as dictionary
+
+    Args:
+        config_fp32 (dict): Config for fp32 quantization
+    """
+    delete_config()
+
+    # Fill in keys_to_save as dict with nonsense val
+    save_dict = {
+        "qa_mode": None,
+        "qw_mode": None,
+        "smoothq": None,
+        "scale_layers": None,
+        "qskip_layer_name": None,
+        "qskip_large_mag_layers": None,
+    }
+    save_json(save_dict, file_path="keys_to_save.json")
+
+
+    with pytest.raises(ValueError):
+        qconfig_save(config_fp32, recipe="keys_to_save.json", minimal=True)
+
+    delete_config()
+
+
+def test_qconfig_load_with_recipe_as_list(
+    config_fp32: dict,
+):
+    """
+    Test if using qconfig_load errors when loading a json list
+
+    Args:
+        config_fp32 (dict): Config for fp32 quantization
+    """
+    delete_config()
+
+    config_list = list( config_fp32.keys() )
+
+    save_json(config_list, file_path="qcfg.json")
+
+    with pytest.raises(ValueError):
+        _ = qconfig_load(fname="qcfg.json")
+
+    delete_config()
+
+
 def test_load_config_restored_pair(
     config_fp32: dict,
     wanted_pair: tuple,