Skip to content

Commit 97d0b25

Browse files
authored
add Qwen3-Coder-480B-A35B-Instruct low_memory config (#68)
1 parent ae2b9ce commit 97d0b25

2 files changed

Lines changed: 62 additions & 0 deletions

File tree

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Global configuration of pipeline
2+
global:
3+
save_path: ./output
4+
5+
# Simplified Configuration for LLM compression
6+
model:
7+
name: Qwen
8+
model_path: Qwen/Qwen3-Coder-480B-A35B-Instruct
9+
trust_remote_code: true
10+
low_cpu_mem_usage: true
11+
use_cache: false
12+
torch_dtype: auto
13+
device_map: cpu
14+
15+
# Compression configuration
16+
compression:
17+
name: PTQ
18+
quantization:
19+
low_memory: true
20+
name: fp8_dynamic
21+
bits: 8
22+
quant_method:
23+
weight: "per-tensor"
24+
activation: "per-tensor"
25+
ignore_layers: # Skip quantization for these layers
26+
- "lm_head"
27+
- "model.embed_tokens"
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Global configuration of pipeline
2+
global:
3+
save_path: ./output
4+
5+
# Simplified Configuration for LLM compression
6+
model:
7+
name: Qwen
8+
model_path: Qwen/Qwen3-Coder-480B-A35B-Instruct
9+
trust_remote_code: true
10+
low_cpu_mem_usage: true
11+
use_cache: false
12+
torch_dtype: auto
13+
device_map: cpu
14+
15+
# Compression configuration
16+
compression:
17+
name: PTQ
18+
quantization:
19+
low_memory: true
20+
name: fp8_static
21+
bits: 8
22+
quant_method:
23+
weight: "per-tensor"
24+
activation: "per-tensor"
25+
ignore_layers: # Skip quantization for these layers
26+
- "lm_head"
27+
- "model.embed_tokens"
28+
29+
# Dataset for calibration
30+
dataset:
31+
name: TextDataset
32+
data_path: ./dataset/sharegpt_gpt4_qwen/sharegpt_gpt4-qwen3_a22B_output.jsonl
33+
max_seq_length: 4096
34+
num_samples: 256
35+
batch_size: 1

0 commit comments

Comments
 (0)