diff --git a/README.md b/README.md
index 9768d2d0..3875893a 100644
--- a/README.md
+++ b/README.md
@@ -169,11 +169,13 @@ A more accessible, comprehensive, and efficient toolkit for large model compress
       <td>
         <ul style="padding-left: 0; list-style-position: inside;">
           <li><a href="https://huggingface.co/collections/Qwen/qwen3-omni">Qwen3-Omni</a></li>
+          <li><a href="https://huggingface.co/collections/Qwen/qwen2-audio">Qwen2-Audio</a></li>
         </ul>
       </td>
       <td>
         <ul style="padding-left: 0; list-style-position: inside;">
           <li><a href="https://github.com/Tencent/AngelSlim/blob/main/docs/source/models/qwen3_omni/qwen3_omni_quant.md">FP8-Static/Dynamic</a></li>
+          <li><a href="https://github.com/Tencent/AngelSlim/tree/main/configs/qwen2_audio">INT8-Dynamic</a></li>
         </ul>
       </td>
       <td>
diff --git a/README_cn.md b/README_cn.md
index 6085e27d..8b9d262e 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -170,11 +170,13 @@
       <td>
         <ul style="padding-left: 0; list-style-position: inside;">
           <li><a href="https://huggingface.co/collections/Qwen/qwen3-omni">Qwen3-Omni</a></li>
+          <li><a href="https://huggingface.co/collections/Qwen/qwen2-audio">Qwen2-Audio</a></li>
         </ul>
       </td>
       <td>
         <ul style="padding-left: 0; list-style-position: inside;">
           <li><a href="https://github.com/Tencent/AngelSlim/blob/main/docs/source/models/qwen3_omni/qwen3_omni_quant.md">FP8-Static/Dynamic</a></li>
+          <li><a href="https://github.com/Tencent/AngelSlim/tree/main/configs/qwen2_audio">INT8-Dynamic</a></li>
         </ul>
       </td>
       <td>
diff --git a/angelslim/data/audio_dataset.py b/angelslim/data/audio_dataset.py
new file mode 100644
index 00000000..0a66e468
--- /dev/null
+++ b/angelslim/data/audio_dataset.py
@@ -0,0 +1,139 @@
+# Copyright 2025 Tencent Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+from typing import Dict, List, Union
+
+import requests
+from transformers import ProcessorMixin
+from transformers.pipelines.audio_utils import ffmpeg_read
+
+from .base_dataset import BaseDataset
+
+
+class AudioDataset(BaseDataset):
+    """Dataset for multimodal (text + image) data"""
+
+    def __init__(
+        self,
+        processor: ProcessorMixin,
+        device: str = "cpu",
+        max_length: int = 4096,
+        num_samples: int = -1,
+        data_source: Union[str, Dict] = None,
+        is_hf_dataset: bool = False,
+        model_name: str = None,
+    ):
+        super().__init__(processor, device, max_length)
+        self.is_hf_dataset = is_hf_dataset
+        self.model_name = model_name
+
+        self._load_file_based_dataset(data_source, num_samples)
+
+    def _load_file_based_dataset(self, data_path: str, num_samples: int):
+        """Load dataset from local file system"""
+        audio_dir = os.path.join(os.path.dirname(data_path), "audios")
+        line_count = 0
+
+        with open(data_path, "r") as f:
+            for line in f:
+                if num_samples > 0 and line_count >= num_samples:
+                    break
+
+                data = json.loads(line.strip())
+                if data["audio_path"].startswith("http://") or data[
+                    "audio_path"
+                ].startswith("https://"):
+                    audio_path = data["audio_path"]
+                else:
+                    audio_path = os.path.join(audio_dir, data["audio_path"])
+
+                # Prepare chat messages with image
+                messages = [
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "audio", "audio_url": audio_path},
+                            {
+                                "type": "text",
+                                "text": data["question"].replace("<audio>", ""),
+                            },
+                        ],
+                    },
+                    {
+                        "role": "assistant",
+                        "content": [{"type": "text", "text": data["answer"]}],
+                    },
+                ]
+
+                self._process_and_append(messages)
+                line_count += 1
+
+    def _process_and_append(self, messages: List[Dict]):
+        """Process messages and append to dataset"""
+
+        input_text = self.processor.apply_chat_template(
+            messages, add_generation_prompt=True, tokenize=False
+        )
+        input_audios = self._extract_audio_info(messages)
+
+        # Process inputs
+        inputs = self.processor(
+            text=input_text,
+            audio=input_audios,
+            sampling_rate=self.processor.feature_extractor.sampling_rate,
+            return_tensors="pt",
+            padding=True,
+        )
+        self.data.append(inputs)
+
+    @staticmethod
+    def read_audio(audio_path):
+        if audio_path.startswith("http://") or audio_path.startswith("https://"):
+            # We need to actually check for a real protocol,
+            # otherwise it's impossible to use a local file
+            # like http_huggingface_co.png
+            inputs = requests.get(audio_path).content
+        else:
+            with open(audio_path, "rb") as f:
+                inputs = f.read()
+        return inputs
+
+    def _extract_audio_info(self, messages: List[Dict]) -> tuple:
+        """Extract Audio paths from messages"""
+        audio_paths = []
+        sampling_rate = self.processor.feature_extractor.sampling_rate
+
+        for message in messages:
+            content = message.get("content", [])
+            if not isinstance(content, list):
+                continue
+
+            for item in content:
+                if item.get("type") == "audio":
+                    # Handle both file paths and PIL images
+                    if isinstance(item["audio_url"], str):
+                        try:
+                            audio_paths.append(
+                                ffmpeg_read(
+                                    self.read_audio(item["audio_url"]),
+                                    sampling_rate=sampling_rate,
+                                )
+                            )
+                        except ValueError as e:
+                            raise ValueError(
+                                f"Could not open audio file: {item['audio_url']}, {e}"
+                            )
+        return audio_paths
diff --git a/angelslim/data/dataloader.py b/angelslim/data/dataloader.py
index c41b755d..1e7b9160 100644
--- a/angelslim/data/dataloader.py
+++ b/angelslim/data/dataloader.py
@@ -18,6 +18,7 @@
 from torch.utils.data import DataLoader
 from transformers import ProcessorMixin
 
+from .audio_dataset import AudioDataset
 from .base_dataset import BaseDataset
 from .multimodal_dataset import MultiModalDataset
 from .omni_dataset import OmniDataset
@@ -110,6 +111,16 @@ def create_data_loader(
                 is_hf_dataset=not os.path.isfile(data_source),
                 use_audio_in_video=use_audio_in_video,
             )
+        elif data_type == "AudioDataset":
+            dataset = AudioDataset(
+                processor=processor,
+                device=device,
+                max_length=max_length,
+                num_samples=num_samples,
+                data_source=data_source,
+                is_hf_dataset=not os.path.isfile(data_source),
+                model_name=model_name,
+            )
         else:
             raise ValueError(f"Unsupported data type: {data_type}")
 
diff --git a/angelslim/engine.py b/angelslim/engine.py
index 1d435811..3ec3e3af 100644
--- a/angelslim/engine.py
+++ b/angelslim/engine.py
@@ -106,7 +106,7 @@ def prepare_model(
 
         self.series = SlimModelFactory.get_series_by_models(model_name)
 
-        if self.series in ["LLM", "VLM"]:
+        if self.series in ["LLM", "VLM", "Audio"]:
             if model:
                 assert tokenizer, " If model is set, tokenizer must be also set."
                 self.slim_model.tokenizer = tokenizer
@@ -162,7 +162,7 @@ def prepare_data(
             data_type=data_type,
             processor=(
                 self.slim_model.processor
-                if self.series == "VLM" or self.series == "Omni"
+                if self.series in ["VLM", "Omni", "Audio"]
                 else self.slim_model.tokenizer
             ),
             device=self.slim_model.model.device,
@@ -205,7 +205,7 @@ def prepare_compressor(
                     f"Compression method '{method_name}' not registered. "
                     f"Available methods: {CompressorFactory.get_available_compressor()}"
                 )
-        if self.series in ["LLM", "VLM", "Omni"]:
+        if self.series in ["LLM", "VLM", "Omni", "Audio"]:
             global_config.update(self.model_path, self.max_seq_length)
 
         if default_method:
diff --git a/angelslim/models/__init__.py b/angelslim/models/__init__.py
index 4036371a..b73a4521 100644
--- a/angelslim/models/__init__.py
+++ b/angelslim/models/__init__.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from .audio import *  # noqa: F401 F403
 from .diffusion import *  # noqa: F401 F403
 from .llm import *  # noqa: F401 F403
 from .model_factory import SlimModelFactory  # noqa: F401
diff --git a/angelslim/models/audio/__init__.py b/angelslim/models/audio/__init__.py
new file mode 100644
index 00000000..54e1528f
--- /dev/null
+++ b/angelslim/models/audio/__init__.py
@@ -0,0 +1,16 @@
+# Copyright 2025 Tencent Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from .qwen2_audio import Qwen2_Audio  # noqa: F401
diff --git a/angelslim/models/audio/qwen2_audio.py b/angelslim/models/audio/qwen2_audio.py
new file mode 100644
index 00000000..5afaeaba
--- /dev/null
+++ b/angelslim/models/audio/qwen2_audio.py
@@ -0,0 +1,185 @@
+# Copyright 2025 Tencent Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+
+import torch
+from tqdm import tqdm
+from transformers import (
+    AutoProcessor,
+    AutoTokenizer,
+    Qwen2AudioForConditionalGeneration,
+)
+
+from ...compressor.quant.core import PTQVLMSaveVllmHF
+from ...utils import find_layers, print_info
+from ..base_model import BaseLLMModel
+from ..model_factory import SlimModelFactory
+
+
+@SlimModelFactory.register
+class Qwen2_Audio(BaseLLMModel):
+    def __init__(
+        self,
+        model=None,
+        deploy_backend="vllm",
+    ):
+        super().__init__(
+            model=model,
+            deploy_backend=deploy_backend,
+        )
+        self.modal_type = "Audio"
+        self.block_name = "language_model.model.layers"
+        self.audio_block_name = "audio_tower.layers"
+
+    def from_pretrained(
+        self,
+        model_path,
+        torch_dtype="auto",
+        device_map="auto",
+        trust_remote_code=True,
+        low_cpu_mem_usage=True,
+        use_cache=False,
+        using_multi_nodes=False,
+    ):
+        self.model = Qwen2AudioForConditionalGeneration.from_pretrained(
+            model_path,
+            torch_dtype=torch_dtype,
+            device_map=device_map,
+            trust_remote_code=trust_remote_code,
+            low_cpu_mem_usage=low_cpu_mem_usage,
+        )
+
+        # Load tokenizer
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            model_path, trust_remote_code=trust_remote_code
+        )
+
+        # Load processor
+        self.processor = AutoProcessor.from_pretrained(
+            model_path, trust_remote_code=trust_remote_code
+        )
+
+    def get_observer_layers(self):
+        names = [
+            "self_attn.k_proj",
+            "self_attn.v_proj",
+            "self_attn.q_proj",
+            "self_attn.o_proj",
+            "mlp.up_proj",
+            "mlp.gate_proj",
+            "mlp.down_proj",
+        ]
+
+        if hasattr(self.quant_config, "quant_audio") and self.quant_config.quant_audio:
+            audio_tower_names = [
+                "self_attn.k_proj",
+                "self_attn.v_proj",
+                "self_attn.q_proj",
+                "self_attn.out_proj",
+                "self_attn.fc1",
+                "self_attn.fc2",
+            ]
+            names.extend(audio_tower_names)
+
+        observer_layers_dict = {}
+        layers_dict = find_layers(self.model, layers=self.observer_layer_classes)
+
+        ignore_layers = self.skip_layer_names()
+        for name, module in layers_dict.items():
+            block_condition = name.startswith(self.block_name) or (
+                hasattr(self.quant_config, "quant_audio")
+                and self.quant_config.quant_audio
+                and name.startswith(self.audio_block_name)
+            )
+            parts = name.split(".")
+            result = ".".join(parts[-2:])
+            if block_condition and result in names:
+                observer_layers_dict[name] = module
+            else:
+                ignore_layers.append(name)
+        self.quant_config.quant_algo_info["ignore_layers"] = ignore_layers
+
+        if self.quant_config.custom_observe_layers_names != "default":
+            for custom_observe_name in self.quant_config.custom_observe_layers_names:
+                for default_name in observer_layers_dict.keys():
+                    if custom_observe_name not in default_name:
+                        observer_layers_dict.pop(default_name)
+        return observer_layers_dict
+
+    def get_smooth_mapping_layers(self, smooth_config, mappings=None):
+        if mappings is None:
+            mappings = [
+                (["q_proj", "k_proj", "v_proj"], "input_layernorm"),
+                (["gate_proj", "up_proj"], "post_attention_layernorm"),
+            ]
+        print(f"smooth mappings={mappings}")
+        assert len(mappings) == 2
+        assert smooth_config.smooth_first_linears or smooth_config.smooth_last_linears
+        # TODO: support smooth_last_linears
+        return super().get_smooth_mapping_layers(smooth_config, mappings)
+
+    def get_parent_dict(self, observer_layers_dict):
+        parent_mapping = {r"experts\.\d+": "experts"}
+        parent_dict = {}
+        for layer_name in observer_layers_dict.keys():
+            parent_name = layer_name
+            for k, v in parent_mapping.items():
+                parent_name = re.sub(k, v, layer_name)
+            if parent_name != layer_name:
+                parent_dict[layer_name] = parent_name
+        return parent_dict
+
+    def model_forward(self, dataloader, **kwargs):
+        self.model.use_cache = False
+
+        calibrated_cnt = 0
+        if (
+            "gptq" in self.quant_config.quant_algo
+            or "awq" in self.quant_config.quant_algo
+            or "gptaq" in self.quant_config.quant_algo
+        ):
+            device = "cuda:0"
+        else:
+            device = self.model.device
+        print_info(f"device is {device}")
+        if dataloader is not None:
+            with torch.no_grad():
+                for batch in tqdm(
+                    dataloader, desc="calibrating...", total=len(dataloader)
+                ):
+                    inputs = {k: v.to(device) for k, v in batch.items()}
+                    inputs["use_cache"] = False
+                    try:
+                        _ = self.model(**inputs)
+
+                        calibrated_cnt += 1
+                    except ValueError:
+                        calibrated_cnt += 1
+                        pass
+
+    def get_quant_module(self):
+        """
+        Returns the module that will be quantized.
+        This is typically the main transformer module of the model.
+        """
+        return self.model.language_model.model.layers
+
+    def get_save_func(self):
+        if self.deploy_backend in ["vllm", "huggingface"]:
+            return PTQVLMSaveVllmHF
+        else:
+            raise NotImplementedError(
+                f"deploy_backend {self.deploy_backend} is not supported for saving."
+            )
diff --git a/angelslim/models/model_factory.py b/angelslim/models/model_factory.py
index 8e029961..111f6114 100644
--- a/angelslim/models/model_factory.py
+++ b/angelslim/models/model_factory.py
@@ -22,7 +22,7 @@ class SlimModelFactory:
     registry: Dict[str, Type] = {}
     series_registry: Dict[str, str] = {}
 
-    ALLOWED_SERIES = ("LLM", "VLM", "Diffusion", "Omni")
+    ALLOWED_SERIES = ("LLM", "VLM", "Diffusion", "Omni", "Audio")
 
     @classmethod
     def register(cls, model_class: Type) -> Type:
@@ -41,6 +41,8 @@ def register(cls, model_class: Type) -> Type:
             series = "Diffusion"
         elif "omni" in module_path:
             series = "Omni"
+        elif "audio" in module_path:
+            series = "Audio"
         else:
             raise ValueError(
                 f"model_class '{class_name}' is not in a valid series: {cls.ALLOWED_SERIES}"  # noqa: E501
diff --git a/angelslim/utils/config_parser.py b/angelslim/utils/config_parser.py
index f7aa6548..5c048f73 100644
--- a/angelslim/utils/config_parser.py
+++ b/angelslim/utils/config_parser.py
@@ -60,7 +60,7 @@ class GlobalConfig:
     save_path: str = field(default="./output")
     # Shared max_seq_length configuration
     max_seq_length: int = field(default=2048)
-    hidden_size: int = field(default=2048)
+    hidden_size: int = field(default=4096)
     model_arch_type: str = field(default=None)
     absolute_model_path: str = field(default=None)
     deploy_backend: str = field(default="vllm")
@@ -91,16 +91,24 @@ def get_max_seq_length(self) -> int:
 
     def set_model_hidden_size(self, model_path) -> int:
         json_data = get_hf_config(model_path)
-        if json_data["model_type"] in ["qwen3_vl"]:
-            self.hidden_size = json_data["text_config"]["hidden_size"]
-        elif (
-            json_data["architectures"][0]
-            if isinstance(json_data["architectures"], list)
-            else json_data["architectures"]
-        ) == "Qwen3OmniMoeForConditionalGeneration":
-            self.hidden_size = json_data["thinker_config"]["text_config"]["hidden_size"]
-        else:
-            self.hidden_size = json_data["hidden_size"]
+        try:
+            if json_data["model_type"] in ["qwen3_vl"]:
+                self.hidden_size = json_data["text_config"]["hidden_size"]
+            elif (
+                json_data["architectures"][0]
+                if isinstance(json_data["architectures"], list)
+                else json_data["architectures"]
+            ) == "Qwen3OmniMoeForConditionalGeneration":
+                self.hidden_size = json_data["thinker_config"]["text_config"][
+                    "hidden_size"
+                ]
+            else:
+                self.hidden_size = json_data["hidden_size"]
+        except KeyError:
+            print(
+                "Warning: Failed to set model hidden size from config.json. "
+                f"Using default hidden size {self.hidden_size}."
+            )
 
     def set_model_arch_type(self, model_path) -> str:
         json_data = get_hf_config(model_path)
@@ -247,9 +255,6 @@ def need_dataset(self) -> bool:
         for method in self.name:
             # PTQ/QAT usually need calibration dataset
             if method in ["PTQ", "QAT"]:
-                # Check if dynamic quantization (usually doesn't need dataset)
-                if self.quantization and "dynamic" in self.quantization.name:
-                    continue
                 # Check if specific quantization helpers need dataset
                 if (
                     self.quantization
@@ -257,6 +262,9 @@ def need_dataset(self) -> bool:
                     and "smooth" in self.quantization.quant_helpers
                 ):
                     return True
+                # Check if dynamic quantization (usually doesn't need dataset)
+                if self.quantization and "dynamic" in self.quantization.name:
+                    continue
                 # Default PTQ/QAT needs dataset
                 return True
         return False
diff --git a/configs/qwen2_audio/fp8_dynamic/qwen2_audio_7b_fp8_dynamic.yaml b/configs/qwen2_audio/fp8_dynamic/qwen2_audio_7b_fp8_dynamic.yaml
new file mode 100644
index 00000000..d62d6b6a
--- /dev/null
+++ b/configs/qwen2_audio/fp8_dynamic/qwen2_audio_7b_fp8_dynamic.yaml
@@ -0,0 +1,25 @@
+# Global configuration of pipeline
+global:
+  save_path: ./output
+
+# Simplified Configuration for LLM compression
+model:
+  name: Qwen2_Audio
+  model_path: Qwen/Qwen2-Audio-7B
+  trust_remote_code: true
+  low_cpu_mem_usage: true
+  use_cache: false
+  torch_dtype: auto
+  device_map: auto
+
+# Compression configuration
+compression:
+  name: PTQ
+  quantization:
+    name: fp8_dynamic
+    bits: 8
+    quant_method:
+      weight: "per-tensor"
+      activation: "per-tensor"
+    ignore_layers:         # Skip quantization for these layers
+      - "lm_head"
diff --git a/configs/qwen2_audio/fp8_static/qwen2_audio_7b_fp8_static.yaml b/configs/qwen2_audio/fp8_static/qwen2_audio_7b_fp8_static.yaml
new file mode 100644
index 00000000..8a50e75c
--- /dev/null
+++ b/configs/qwen2_audio/fp8_static/qwen2_audio_7b_fp8_static.yaml
@@ -0,0 +1,33 @@
+# Global configuration of pipeline
+global:
+  save_path: ./output
+
+# Simplified Configuration for LLM compression
+model:
+  name: Qwen2_Audio
+  model_path: Qwen/Qwen2-Audio-7B
+  trust_remote_code: true
+  low_cpu_mem_usage: true
+  use_cache: false
+  torch_dtype: auto
+  device_map: auto
+
+# Compression configuration
+compression:
+  name: PTQ
+  quantization:
+    name: fp8_static
+    bits: 8
+    quant_method:
+      weight: "per-tensor"
+      activation: "per-tensor"
+    ignore_layers:         # Skip quantization for these layers
+      - "lm_head"
+
+# Dataset for calibration
+dataset:
+  name: AudioDataset
+  data_path: ./dataset/audio_fake_data/fake_data.json
+  max_seq_length: 4096
+  num_samples: 256
+  batch_size: 1
diff --git a/configs/qwen2_audio/int8_dynamic/qwen2_audio_7b_int8_dynamic.yaml b/configs/qwen2_audio/int8_dynamic/qwen2_audio_7b_int8_dynamic.yaml
new file mode 100644
index 00000000..50ca9bac
--- /dev/null
+++ b/configs/qwen2_audio/int8_dynamic/qwen2_audio_7b_int8_dynamic.yaml
@@ -0,0 +1,25 @@
+# Global configuration of pipeline
+global:
+  save_path: ./output
+
+# Simplified Configuration for LLM compression
+model:
+  name: Qwen2_Audio
+  model_path: Qwen/Qwen2-Audio-7B
+  trust_remote_code: true
+  low_cpu_mem_usage: true
+  use_cache: false
+  torch_dtype: auto
+  device_map: auto
+
+# Compression configuration
+compression:
+  name: PTQ
+  quantization:
+    name: int8_dynamic
+    bits: 8
+    quant_method:
+      weight: "per-channel"
+      activation: "per-token"
+    ignore_layers:         # Skip quantization for these layers
+      - "lm_head"
diff --git a/configs/qwen2_audio/smooth_int8/qwen2_audio_7b_int8_dynamic_smooth.yaml b/configs/qwen2_audio/smooth_int8/qwen2_audio_7b_int8_dynamic_smooth.yaml
new file mode 100644
index 00000000..76493d4e
--- /dev/null
+++ b/configs/qwen2_audio/smooth_int8/qwen2_audio_7b_int8_dynamic_smooth.yaml
@@ -0,0 +1,35 @@
+# Global configuration of pipeline
+global:
+  save_path: ./output
+
+# Simplified Configuration for LLM compression
+model:
+  name: Qwen2_Audio
+  model_path: Qwen/Qwen2-Audio-7B
+  trust_remote_code: true
+  low_cpu_mem_usage: true
+  use_cache: false
+  torch_dtype: auto
+  device_map: auto
+
+# Compression configuration
+compression:
+  name: PTQ
+  quantization:
+    name: int8_dynamic
+    bits: 8
+    quant_method:
+      weight: "per-channel"
+      activation: "per-token"
+    quant_helpers:
+      - "smooth"
+    ignore_layers:
+      - "lm_head"
+
+# Dataset for calibration
+dataset:
+  name: AudioDataset
+  data_path: ./dataset/audio_fake_data/fake_data.json
+  max_seq_length: 4096
+  num_samples: 256
+  batch_size: 1
diff --git a/dataset/audio_fake_data/audios/1.wav b/dataset/audio_fake_data/audios/1.wav
new file mode 100644
index 00000000..256e4afd
Binary files /dev/null and b/dataset/audio_fake_data/audios/1.wav differ
diff --git a/dataset/audio_fake_data/audios/2.wav b/dataset/audio_fake_data/audios/2.wav
new file mode 100644
index 00000000..1f662557
Binary files /dev/null and b/dataset/audio_fake_data/audios/2.wav differ
diff --git a/dataset/audio_fake_data/fake_data.json b/dataset/audio_fake_data/fake_data.json
new file mode 100755
index 00000000..130dd12e
--- /dev/null
+++ b/dataset/audio_fake_data/fake_data.json
@@ -0,0 +1,2 @@
+{"question": "Detect the language and recognize the speech: ", "answer": "甚至出现交易几乎停滞的情况","audio_path": "./1.wav","type": "multimodal"}
+{"question": "Detect the language and recognize the speech: ","answer": "换一首歌","audio_path": "./2.wav","type": "multimodal"}
\ No newline at end of file
diff --git a/docs/source/index.md b/docs/source/index.md
index 51656fba..ad98e76d 100644
--- a/docs/source/index.md
+++ b/docs/source/index.md
@@ -98,7 +98,9 @@ AngelSlim是腾讯自研的，致力于打造更易用、更全面和更高效
          - 建设中
    * - **语音(TTS/ASR)**
      - - Qwen3-Omni
+       - Qwen2-Audio
      - - FP8-Static/Dynamic
+       - INT8-Dynamic
      - - 建设中
      - - **Token剪枝**