add Qwen3-Coder-480B-A35B-Instruct low_memory config (#68)

RuBing-Yang · web-flow · commit 97d0b2532eca · 2025-09-08T19:26:47.000+08:00
diff --git a/configs/qwen3/fp8_dynamic/qwen3_coder-a35b_fp8_dynamic_low_memory.yaml b/configs/qwen3/fp8_dynamic/qwen3_coder-a35b_fp8_dynamic_low_memory.yaml
@@ -0,0 +1,27 @@
+# Global configuration of pipeline
+global:
+  save_path: ./output
+
+# Simplified Configuration for LLM compression
+model:
+  name: Qwen
+  model_path: Qwen/Qwen3-Coder-480B-A35B-Instruct
+  trust_remote_code: true
+  low_cpu_mem_usage: true
+  use_cache: false
+  torch_dtype: auto
+  device_map: cpu
+
+# Compression configuration
+compression:
+  name: PTQ
+  quantization:
+    low_memory: true
+    name: fp8_dynamic
+    bits: 8
+    quant_method:
+      weight: "per-tensor"
+      activation: "per-tensor"
+    ignore_layers:         # Skip quantization for these layers
+      - "lm_head"
+      - "model.embed_tokens"
diff --git a/configs/qwen3/fp8_static/qwen3_coder-a35b_fp8_static_low_memory.yaml b/configs/qwen3/fp8_static/qwen3_coder-a35b_fp8_static_low_memory.yaml
@@ -0,0 +1,35 @@
+# Global configuration of pipeline
+global:
+  save_path: ./output
+
+# Simplified Configuration for LLM compression
+model:
+  name: Qwen
+  model_path: Qwen/Qwen3-Coder-480B-A35B-Instruct
+  trust_remote_code: true
+  low_cpu_mem_usage: true
+  use_cache: false
+  torch_dtype: auto
+  device_map: cpu
+
+# Compression configuration
+compression:
+  name: PTQ
+  quantization:
+    low_memory: true
+    name: fp8_static
+    bits: 8
+    quant_method:
+      weight: "per-tensor"
+      activation: "per-tensor"
+    ignore_layers:         # Skip quantization for these layers
+      - "lm_head"
+      - "model.embed_tokens"
+
+# Dataset for calibration
+dataset:
+  name: TextDataset
+  data_path: ./dataset/sharegpt_gpt4_qwen/sharegpt_gpt4-qwen3_a22B_output.jsonl
+  max_seq_length: 4096
+  num_samples: 256
+  batch_size: 1