diff --git a/configs/qwen2_5/fp8_dynamic/qwen2_5-0_5b_instruct_fp8_dynamic.yaml b/configs/qwen2_5/fp8_dynamic/qwen2_5-0_5b_instruct_fp8_dynamic.yaml new file mode 100644 index 00000000..e65710af --- /dev/null +++ b/configs/qwen2_5/fp8_dynamic/qwen2_5-0_5b_instruct_fp8_dynamic.yaml @@ -0,0 +1,27 @@ +# Global configuration of pipeline +global: + save_path: ./output + +# Simplified Configuration for LLM compression +model: + name: Qwen + model_path: Qwen/Qwen2.5-0.5B-Instruct + trust_remote_code: true + low_cpu_mem_usage: true + use_cache: false + torch_dtype: auto + device_map: auto + +# Compression configuration +compression: + name: PTQ + quantization: + name: fp8_dynamic # Supported: fp8_static, fp8_dynamic, int4_awq, int4_gptq + bits: 8 # Quantization bits (4/8) + quant_method: + weight: "per-tensor" + activation: "per-tensor" + ignore_layers: # Skip quantization for these layers + - "lm_head" + - "model.embed_tokens" + diff --git a/configs/qwen2_5/fp8_dynamic/qwen2_5-3b_instruct_fp8_dynamic.yaml b/configs/qwen2_5/fp8_dynamic/qwen2_5-3b_instruct_fp8_dynamic.yaml new file mode 100644 index 00000000..703cbf75 --- /dev/null +++ b/configs/qwen2_5/fp8_dynamic/qwen2_5-3b_instruct_fp8_dynamic.yaml @@ -0,0 +1,26 @@ +# Global configuration of pipeline +global: + save_path: ./output + +# Simplified Configuration for LLM compression +model: + name: Qwen + model_path: Qwen/Qwen2.5-3B-Instruct + trust_remote_code: true + low_cpu_mem_usage: true + use_cache: false + torch_dtype: auto + device_map: auto + +# Compression configuration +compression: + name: PTQ + quantization: + name: fp8_dynamic # Supported: fp8_static, fp8_dynamic, int4_awq, int4_gptq + bits: 8 # Quantization bits (4/8) + quant_method: + weight: "per-tensor" + activation: "per-tensor" + ignore_layers: # Skip quantization for these layers + - "lm_head" + - "model.embed_tokens" diff --git a/configs/qwen2_5/fp8_dynamic/qwen2_5-72b_instruct_fp8_dynamic.yaml b/configs/qwen2_5/fp8_dynamic/qwen2_5-72b_instruct_fp8_dynamic.yaml new file mode 100644 index 00000000..23198680 --- /dev/null +++ b/configs/qwen2_5/fp8_dynamic/qwen2_5-72b_instruct_fp8_dynamic.yaml @@ -0,0 +1,26 @@ +# Global configuration of pipeline +global: + save_path: ./output + +# Simplified Configuration for LLM compression +model: + name: Qwen + model_path: Qwen/Qwen2.5-72B-Instruct + trust_remote_code: true + low_cpu_mem_usage: true + use_cache: false + torch_dtype: auto + device_map: auto + +# Compression configuration +compression: + name: PTQ + quantization: + name: fp8_dynamic # Supported: fp8_static, fp8_dynamic, int4_awq, int4_gptq + bits: 8 # Quantization bits (4/8) + quant_method: + weight: "per-tensor" + activation: "per-tensor" + ignore_layers: # Skip quantization for these layers + - "lm_head" + - "model.embed_tokens" diff --git a/configs/qwen2_5/fp8_static/qwen2_5-0_5b_instruct_fp8_static.yaml b/configs/qwen2_5/fp8_static/qwen2_5-0_5b_instruct_fp8_static.yaml new file mode 100644 index 00000000..8dcca5f5 --- /dev/null +++ b/configs/qwen2_5/fp8_static/qwen2_5-0_5b_instruct_fp8_static.yaml @@ -0,0 +1,34 @@ +# Global configuration of pipeline +global: + save_path: ./output + +# Simplified Configuration for LLM compression +model: + name: Qwen + model_path: Qwen/Qwen2.5-0.5B-Instruct + trust_remote_code: true + low_cpu_mem_usage: true + use_cache: false + torch_dtype: auto + device_map: auto + +# Compression configuration +compression: + name: PTQ + quantization: + name: fp8_static # Supported: fp8_static, fp8_dynamic, int4_awq, int4_gptq + bits: 8 # Quantization bits (4/8) + quant_method: + weight: "per-tensor" + activation: "per-tensor" + ignore_layers: # Skip quantization for these layers + - "lm_head" + - "model.embed_tokens" + +# Dataset for calibration +dataset: + name: TextDataset + data_path: ./dataset/sharegpt_gpt4_qwen/sharegpt_gpt4-qwen3_a22B_output.jsonl + max_seq_length: 4096 + num_samples: 256 + batch_size: 1 diff --git a/configs/qwen2_5/fp8_static/qwen2_5-7b_fp8_static.yaml b/configs/qwen2_5/fp8_static/qwen2_5-3b_instruct_fp8_static.yaml similarity index 86% rename from configs/qwen2_5/fp8_static/qwen2_5-7b_fp8_static.yaml rename to configs/qwen2_5/fp8_static/qwen2_5-3b_instruct_fp8_static.yaml index b14bf698..2cfe37c7 100644 --- a/configs/qwen2_5/fp8_static/qwen2_5-7b_fp8_static.yaml +++ b/configs/qwen2_5/fp8_static/qwen2_5-3b_instruct_fp8_static.yaml @@ -5,7 +5,7 @@ global: # Simplified Configuration for LLM compression model: name: Qwen - model_path: Qwen/Qwen2.5-7B-Instruct + model_path: Qwen/Qwen2.5-3B-Instruct trust_remote_code: true low_cpu_mem_usage: true use_cache: false @@ -28,7 +28,7 @@ compression: # Dataset for calibration dataset: name: TextDataset - data_path: ./dataset/sharegpt/sharegpt_gpt4_512.json + data_path: ./dataset/sharegpt_gpt4_qwen/sharegpt_gpt4-qwen3_a22B_output.jsonl max_seq_length: 4096 num_samples: 256 batch_size: 1 diff --git a/configs/qwen2_5/fp8_static/qwen2_5-72b_instruct_fp8_static.yaml b/configs/qwen2_5/fp8_static/qwen2_5-72b_instruct_fp8_static.yaml new file mode 100644 index 00000000..801f11fa --- /dev/null +++ b/configs/qwen2_5/fp8_static/qwen2_5-72b_instruct_fp8_static.yaml @@ -0,0 +1,34 @@ +# Global configuration of pipeline +global: + save_path: ./output + +# Simplified Configuration for LLM compression +model: + name: Qwen + model_path: Qwen/Qwen2.5-72B-Instruct + trust_remote_code: true + low_cpu_mem_usage: true + use_cache: false + torch_dtype: auto + device_map: auto + +# Compression configuration +compression: + name: PTQ + quantization: + name: fp8_static # Supported: fp8_static, fp8_dynamic, int4_awq, int4_gptq + bits: 8 # Quantization bits (4/8) + quant_method: + weight: "per-tensor" + activation: "per-tensor" + ignore_layers: # Skip quantization for these layers + - "lm_head" + - "model.embed_tokens" + +# Dataset for calibration +dataset: + name: TextDataset + data_path: ./dataset/sharegpt_gpt4_qwen/sharegpt_gpt4-qwen3_a22B_output.jsonl + max_seq_length: 4096 + num_samples: 256 + batch_size: 1 diff --git a/configs/qwen2_5/fp8_static/qwen2_5-7b_fp8_static_low_memory.yaml b/configs/qwen2_5/fp8_static/qwen2_5-7b_fp8_static_low_memory.yaml index 11bcd925..e002ba2d 100644 --- a/configs/qwen2_5/fp8_static/qwen2_5-7b_fp8_static_low_memory.yaml +++ b/configs/qwen2_5/fp8_static/qwen2_5-7b_fp8_static_low_memory.yaml @@ -29,7 +29,7 @@ compression: # Dataset for calibration dataset: name: TextDataset - data_path: ./dataset/sharegpt/sharegpt_gpt4_512.json + data_path: ./dataset/sharegpt_gpt4_qwen/sharegpt_gpt4-qwen3_a22B_output.jsonl max_seq_length: 4096 num_samples: 256 batch_size: 1