File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ # Global configuration of pipeline
2+ global :
3+ save_path : ./output
4+
5+ # Simplified Configuration for LLM compression
6+ model :
7+ name : Qwen
8+ model_path : Qwen/Qwen3-Coder-480B-A35B-Instruct
9+ trust_remote_code : true
10+ low_cpu_mem_usage : true
11+ use_cache : false
12+ torch_dtype : auto
13+ device_map : cpu
14+
15+ # Compression configuration
16+ compression :
17+ name : PTQ
18+ quantization :
19+ low_memory : true
20+ name : fp8_dynamic
21+ bits : 8
22+ quant_method :
23+ weight : " per-tensor"
24+ activation : " per-tensor"
25+ ignore_layers : # Skip quantization for these layers
26+ - " lm_head"
27+ - " model.embed_tokens"
Original file line number Diff line number Diff line change 1+ # Global configuration of pipeline
2+ global :
3+ save_path : ./output
4+
5+ # Simplified Configuration for LLM compression
6+ model :
7+ name : Qwen
8+ model_path : Qwen/Qwen3-Coder-480B-A35B-Instruct
9+ trust_remote_code : true
10+ low_cpu_mem_usage : true
11+ use_cache : false
12+ torch_dtype : auto
13+ device_map : cpu
14+
15+ # Compression configuration
16+ compression :
17+ name : PTQ
18+ quantization :
19+ low_memory : true
20+ name : fp8_static
21+ bits : 8
22+ quant_method :
23+ weight : " per-tensor"
24+ activation : " per-tensor"
25+ ignore_layers : # Skip quantization for these layers
26+ - " lm_head"
27+ - " model.embed_tokens"
28+
29+ # Dataset for calibration
30+ dataset :
31+ name : TextDataset
32+ data_path : ./dataset/sharegpt_gpt4_qwen/sharegpt_gpt4-qwen3_a22B_output.jsonl
33+ max_seq_length : 4096
34+ num_samples : 256
35+ batch_size : 1
You can’t perform that action at this time.
0 commit comments