Skip to content

Commit ba95205

Browse files
authored
Merge pull request #457 from Michael20070814/main
add the recommended python version
2 parents 4fe480d + e0117aa commit ba95205

7 files changed

Lines changed: 197 additions & 10 deletions

File tree

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,5 @@ save*
2222
.log
2323
*.pid
2424
*.ipynb*
25+
.venv/
26+
*.sh

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/llmcompression:pure-lates
3434

3535
**Docs**: [English](https://llmc-en.readthedocs.io/en/latest/), [Chinese](https://llmc-zhcn.readthedocs.io/en/latest/).
3636

37+
> **Recommended Python Version**: We recommend using **Python 3.11** for local development and installation. This matches the project's Docker images and CI configuration, and is generally more stable than Python 3.12 for the current dependency set.
38+
3739
## :fire: Latest News
3840

3941
- **Nov 9, 2025:** 🍺🍺🍺 Our work [**LLMC+: Benchmarking Vision-Language Model Compression with a Plug-and-play Toolkit**](https://arxiv.org/abs/2508.09981) has been accepted by AAAI 2026.

README_zh.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/llmcompression:pure-lates
3131

3232
**文档**[English](https://llmc-en.readthedocs.io/en/latest/)[中文](https://llmc-zhcn.readthedocs.io/en/latest/)
3333

34+
> **推荐 Python 版本**:建议本地开发和安装使用 **Python 3.11**。这与项目的 Docker 镜像和 CI 配置保持一致,并且对当前依赖集合而言通常比 Python 3.12 更稳定。
35+
3436
## :fire: 最新动态
3537

3638
- **2025年8月13日:** 🚀 我们已开源针对 **视觉语言模型(VLMs)** 的压缩方案,支持共计超过 **20 种算法**,涵盖 **token reduction****quantization**。此次发布为多模态任务提供了灵活、即插即用的压缩策略。具体请参阅[文档](https://llmc-zhcn.readthedocs.io/en/latest/advanced/token_reduction.html)

configs/quantization/methods/KVQuant/rtn_w_a_pertensor_static_naive_quant_kv.yml

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,25 @@
11
base:
22
seed: &seed 42
33
model:
4-
type: model_type
5-
path: model path
4+
type: Qwen3
5+
path: /home/michael/Project/models/Qwen3-0.6B
66
torch_dtype: auto
77
calib:
88
name: pileval
99
download: False
10-
path: calib data path
10+
path: /home/michael/Project/calib/pileval
11+
n_sample: 128
1112
n_samples: 128
1213
bs: 1
1314
seq_len: 2048
1415
preproc: txt_general_preproc
1516
seed: *seed
1617
eval:
17-
eval_pos: [transformed, fake_quant, fake_quant_wo_kv] #long_ppl eval not support pretrain eval pos
18+
eval_pos: [] #long_ppl eval not support pretrain eval pos
1819
name: wikitext2
1920
type: decode_ppl
2021
download: False
21-
path: eval_data_path
22+
path: /home/michael/Project/llmc_datasets/wikitext2
2223
bs: 1
2324
inference_per_block: False
2425
num_samples: 10
@@ -41,5 +42,7 @@ quant:
4142
symmetric: True
4243
granularity: per_tensor
4344
save:
45+
save_calib_json: True
46+
calib_json_name: kv_cache_calib.json
4447
save_fake: False
45-
save_path: /path/to/save/
48+
save_path: /home/michael/Project/llmc_save

llmc/__main__.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,27 @@
2626

2727

2828
def main(config):
29+
# 从注册表拿模型并实例化
30+
# 动态分配模型
2931
model = MODEL_REGISTRY[config.model.type](config)
3032

33+
# 打印模型和tokenizer
3134
logger.info(f'model: {model}')
3235
logger.info(f'tokenizer: {model.get_tokenizer()}')
3336

37+
# 获得需要的评测种类
3438
eval_list = get_eval_list(model, config)
39+
# 真正执行评测
3540
eval_model(model, None, eval_list, eval_pos='pretrain')
3641

3742
blockwise_opts = []
43+
# 取出处理模态
3844
modalities, modality_configs = get_modality(config)
3945

4046
for modality, modality_config in zip(modalities, modality_configs):
4147
model.set_modality(modality)
4248
if not config.get('calib', False):
49+
# 不需要校准数据 直接构造算法对象
4350
blockwise_opt = ALGO_REGISTRY[modality_config.method](
4451
model,
4552
modality_config,
@@ -51,30 +58,54 @@ def main(config):
5158
blockwise_opts.append(blockwise_opt)
5259
dist.barrier()
5360
else:
61+
# 需要校准数据
5462
dataset = BaseDataset(
5563
model.get_tokenizer(), config.calib, model.batch_process
5664
)
5765
calib_data, padding_mask = dataset.get_calib_dataset()
66+
# 收集第一层block输入 为后续blockwise算法需要的输入缓存下来
5867
model.collect_first_block_input(calib_data, padding_mask)
5968
del calib_data
6069
gc.collect()
6170
torch.cuda.empty_cache()
71+
# 构造算法对象
6272
blockwise_opt = ALGO_REGISTRY[modality_config.method](
6373
model,
6474
modality_config,
6575
model.get_first_block_input(),
6676
model.get_padding_mask(),
6777
config,
6878
)
79+
# 项目逐层block做优化
6980
blockwise_opt.run_block_loop()
7081
blockwise_opts.append(blockwise_opt)
7182
dist.barrier()
7283

84+
# 对变化后的浮点模型做评测
7385
eval_model(model, blockwise_opts, eval_list, eval_pos='transformed')
86+
# 只有rank 0继续做保存和导出
7487
if int(os.environ['RANK']) == 0:
88+
if 'save' in config and config.save.get('save_calib_json', False):
89+
# 收集各个模态/量化器导出的校准结果。
90+
calib_json_list = [
91+
blockwise_opt.collect_calib_json()
92+
for blockwise_opt in blockwise_opts
93+
if hasattr(blockwise_opt, 'collect_calib_json')
94+
]
95+
# 单模态时保持扁平结构,兼容 LightLLM 的校准文件格式。
96+
calib_json_payload = (
97+
calib_json_list[0] if len(calib_json_list) == 1 else calib_json_list
98+
)
99+
# 将最终的校准 JSON 写入配置指定的输出路径。
100+
with open(save_calib_json_path, 'w') as file:
101+
json.dump(calib_json_payload, file, ensure_ascii=False, indent=4)
102+
logger.info(f'save calib json done -- {save_calib_json_path}')
103+
104+
# 保存变换后的浮点模型
75105
if 'save' in config and config.save.get('save_trans', False):
76106
blockwise_opt.save_model(save_trans_path)
77107

108+
# 保存TensorRT-LLM格式并构建engine
78109
if 'save' in config and config.save.get('save_trtllm', False):
79110
blockwise_opt.save_model(save_trtllm_trans_path)
80111
from llmc.utils.export_trtllm import cvt_trtllm_engine
@@ -88,22 +119,28 @@ def main(config):
88119
eval_model(model, blockwise_opts, eval_list, eval_pos='fake_quant')
89120
eval_model(model, blockwise_opts, eval_list, eval_pos='fake_quant_wo_kv')
90121

122+
# 切换到fake quant部署模式再保存
91123
if 'save' in config and config.save.get('save_fake', False):
92124
deploy_all_modality(blockwise_opts, 'fake_quant')
93125
blockwise_opt.save_model(save_fake_path)
94126

95127
if 'save' in config:
128+
# 导出真实量化模型给推理后端
96129
if (
130+
# 导出前进行遍历检查
97131
config.save.get('save_vllm', False)
98132
or config.save.get('save_sgl', False)
99133
or config.save.get('save_lightllm', False)
100134
):
101135
for modality_config in modality_configs:
102136
w, a = modality_config.weight, modality_config.get('act')
103137

138+
# 只允许特定bit类型
104139
if isinstance(w.bit, str):
140+
# 必须对称量化
105141
assert w.symmetric, 'Only symmetric quant is supported.'
106142
assert w.bit in ['e4m3', 'e3m4'], 'Supported quant: w8a16.'
143+
# 有激活量化的话,那激活也要满足对称、bit合法的要求
107144
if a:
108145
assert (
109146
w.symmetric and a.symmetric
@@ -114,6 +151,7 @@ def main(config):
114151
and a.bit in ['e4m3', 'e5m2']
115152
), 'Only WA FP8 quant is supported'
116153
else:
154+
# 是整数则必须是4 or 8
117155
assert w.symmetric, 'Only symmetric quant is supported.'
118156
assert w.bit in [4, 8], 'Supported quant: w4a16, w8a16, w8a8.'
119157
if a:
@@ -130,12 +168,15 @@ def main(config):
130168
blockwise_opt.save_model(save_quant_path)
131169
update_vllm_quant_config(blockwise_opt.model, config, save_quant_path)
132170

171+
# 给特定后端(AutoAWQ导出
133172
elif config.save.get('save_autoawq', False):
134173
for modality_config in modality_configs:
174+
# 只能4 bit 仅含有weight 不支持act
135175
assert (
136176
modality_config.weight.bit in [4] and 'act' not in modality_config
137177
), 'AutoAWQ supports only 4-bit weight-only quantization.'
138178
assert (
179+
# 不能对称量化
139180
not modality_config.weight.symmetric
140181
), 'Only asymmetric quant is supported.'
141182

@@ -161,18 +202,23 @@ def main(config):
161202
blockwise_opt.save_model(save_quant_path)
162203
update_lightx2v_quant_config(save_quant_path)
163204

205+
# 判断是否有opencompass
164206
if 'opencompass' in config:
165207
assert config.save.get('save_trans', False)
208+
# 从配置里读取cfg_path, output_path
166209
cfg_path = config['opencompass']['cfg_path']
167210
output_path = config['opencompass']['output_path']
211+
# 取路径
168212
eval_model_path = os.path.abspath(save_trans_path)
213+
# 拼指令
169214
opencompass_cmd = (
170215
f'opencompass {cfg_path} -w {output_path} '
171216
f'--llmc_cfg {args.config} '
172217
f'--llmc_eval_mode quant '
173218
f'--llmc_model_path {eval_model_path}'
174219
)
175220
logger.info(f'opencompass_cmd : {opencompass_cmd}')
221+
# 执行
176222
os.system(opencompass_cmd)
177223
dist.barrier()
178224

@@ -181,20 +227,25 @@ def main(config):
181227
logger.add(sys.stdout, level='INFO')
182228
llmc_start_time = time.time()
183229
parser = argparse.ArgumentParser()
230+
# 解析命令行参数
184231
parser.add_argument('--config', type=str, required=True)
185232
parser.add_argument('--task_id', type=str, required=True)
186233
args = parser.parse_args()
187234

188235
with open(args.config, 'r') as file:
236+
# 读取配置文件
189237
config = yaml.safe_load(file)
190238
config = EasyDict(config)
191239

192240
init_process_group(backend='nccl')
241+
# 初始化分布式环境 设置GPU
193242
torch.cuda.set_device(int(os.environ['LOCAL_RANK']))
194243

244+
# 检查配置 打印依赖版本
195245
if int(os.environ['RANK']) != 0:
196246
logger.remove()
197247

248+
# 检查配置是否合法
198249
check_config(config)
199250

200251
logger.info(f'args: {args}')
@@ -209,6 +260,12 @@ def main(config):
209260
# Ensure only the main process creates directories
210261
if int(os.environ['RANK']) == 0:
211262
if 'save' in config:
263+
if config.save.get('save_calib_json', False):
264+
mkdirs(config.save.save_path)
265+
save_calib_json_path = os.path.join(
266+
config.save.save_path,
267+
config.save.get('calib_json_name', 'calib_scales.json'),
268+
)
212269
if config.save.get('save_trans', False):
213270
save_trans_path = os.path.join(
214271
config.save.save_path, 'transformed_model'
@@ -266,3 +323,4 @@ def main(config):
266323
llmc_duration_time = llmc_end_time - llmc_start_time
267324
logger.info(f'llmc_duration_time: {llmc_duration_time} s')
268325
logger.info('--- llmc finished ---')
326+

0 commit comments

Comments
 (0)