add qwen2.5 code 14B dynamic config (#56)

WOODchen7 · woodchenwu · web-flow · commit 2402f69dddf3 · 2025-08-28T16:55:31.000+08:00
Co-authored-by: woodchenwu &lt;woodchenwu@tencent.com&gt;
diff --git a/configs/qwen2_5/fp8_dynamic/qwen2_5-14b_instruct_fp8_dynamic.yaml b/configs/qwen2_5/fp8_dynamic/qwen2_5-14b_instruct_fp8_dynamic.yaml
@@ -0,0 +1,26 @@
+# Global configuration of pipeline
+global:
+  save_path: ./output
+
+# Simplified Configuration for LLM compression
+model:
+  name: Qwen
+  model_path: Qwen/Qwen2.5-14B-Instruct
+  trust_remote_code: true
+  low_cpu_mem_usage: true
+  use_cache: false
+  torch_dtype: auto
+  device_map: auto
+
+# Compression configuration
+compression:
+  name: PTQ
+  quantization:
+    name: fp8_dynamic     # Supported: fp8_static, fp8_dynamic, int4_awq, int4_gptq
+    bits: 8                # Quantization bits (4/8)
+    quant_method:
+      weight: "per-tensor"
+      activation: "per-tensor"
+    ignore_layers:         # Skip quantization for these layers
+      - "lm_head"
+      - "model.embed_tokens"
diff --git a/configs/qwen2_5/fp8_static/qwen2_5-14b_instruct_fp8_static.yaml b/configs/qwen2_5/fp8_static/qwen2_5-14b_instruct_fp8_static.yaml
@@ -5,7 +5,7 @@ global:
 # Simplified Configuration for LLM compression
 model:
   name: Qwen
-  model_path: Qwen/Qwen2.5-Coder-14B-Instruct
+  model_path: Qwen/Qwen2.5-14B-Instruct
   trust_remote_code: true
   low_cpu_mem_usage: true
   use_cache: false