From 715930bea6abf01ef4c144cf15cbb424cb705419 Mon Sep 17 00:00:00 2001
From: RuBing-Yang <beatrix_y@buaa.edu.cn>
Date: Mon, 8 Sep 2025 18:24:02 +0800
Subject: [PATCH] add Qwen3-Coder-480B-A35B-Instruct low_memory config

---
 ...en3_coder-a35b_fp8_dynamic_low_memory.yaml | 27 ++++++++++++++
 ...wen3_coder-a35b_fp8_static_low_memory.yaml | 35 +++++++++++++++++++
 2 files changed, 62 insertions(+)
 create mode 100644 configs/qwen3/fp8_dynamic/qwen3_coder-a35b_fp8_dynamic_low_memory.yaml
 create mode 100644 configs/qwen3/fp8_static/qwen3_coder-a35b_fp8_static_low_memory.yaml

diff --git a/configs/qwen3/fp8_dynamic/qwen3_coder-a35b_fp8_dynamic_low_memory.yaml b/configs/qwen3/fp8_dynamic/qwen3_coder-a35b_fp8_dynamic_low_memory.yaml
new file mode 100644
index 00000000..483b6c3c
--- /dev/null
+++ b/configs/qwen3/fp8_dynamic/qwen3_coder-a35b_fp8_dynamic_low_memory.yaml
@@ -0,0 +1,27 @@
+# Global configuration of pipeline
+global:
+  save_path: ./output
+
+# Simplified Configuration for LLM compression
+model:
+  name: Qwen
+  model_path: Qwen/Qwen3-Coder-480B-A35B-Instruct
+  trust_remote_code: true
+  low_cpu_mem_usage: true
+  use_cache: false
+  torch_dtype: auto
+  device_map: cpu
+
+# Compression configuration
+compression:
+  name: PTQ
+  quantization:
+    low_memory: true
+    name: fp8_dynamic
+    bits: 8
+    quant_method:
+      weight: "per-tensor"
+      activation: "per-tensor"
+    ignore_layers:         # Skip quantization for these layers
+      - "lm_head"
+      - "model.embed_tokens"
diff --git a/configs/qwen3/fp8_static/qwen3_coder-a35b_fp8_static_low_memory.yaml b/configs/qwen3/fp8_static/qwen3_coder-a35b_fp8_static_low_memory.yaml
new file mode 100644
index 00000000..e4ee611a
--- /dev/null
+++ b/configs/qwen3/fp8_static/qwen3_coder-a35b_fp8_static_low_memory.yaml
@@ -0,0 +1,35 @@
+# Global configuration of pipeline
+global:
+  save_path: ./output
+
+# Simplified Configuration for LLM compression
+model:
+  name: Qwen
+  model_path: Qwen/Qwen3-Coder-480B-A35B-Instruct
+  trust_remote_code: true
+  low_cpu_mem_usage: true
+  use_cache: false
+  torch_dtype: auto
+  device_map: cpu
+
+# Compression configuration
+compression:
+  name: PTQ
+  quantization:
+    low_memory: true
+    name: fp8_static
+    bits: 8
+    quant_method:
+      weight: "per-tensor"
+      activation: "per-tensor"
+    ignore_layers:         # Skip quantization for these layers
+      - "lm_head"
+      - "model.embed_tokens"
+
+# Dataset for calibration
+dataset:
+  name: TextDataset
+  data_path: ./dataset/sharegpt_gpt4_qwen/sharegpt_gpt4-qwen3_a22B_output.jsonl
+  max_seq_length: 4096
+  num_samples: 256
+  batch_size: 1