Tencent · yghstill · Aug 28, 2025 · Aug 22, 2025 · Aug 28, 2025
diff --git a/angelslim/models/llm/__init__.py b/angelslim/models/llm/__init__.py
@@ -18,3 +18,4 @@
 from .kimi_k2 import KimiK2  # noqa: F401
 from .llama import Llama  # noqa: F401
 from .qwen import Qwen  # noqa: F401
+from .seed_oss import SeedOss  # noqa: F401
diff --git a/angelslim/models/llm/seed_oss.py b/angelslim/models/llm/seed_oss.py
@@ -0,0 +1,95 @@
+# Copyright 2025 Tencent Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+
+import torch.nn as nn
+
+from ...compressor.quant.core import PTQSaveVllmHF
+from ..base_model import BaseLLMModel
+from ..model_factory import SlimModelFactory
+
+
+@SlimModelFactory.register
+class SeedOss(BaseLLMModel):
+    def __init__(
+        self,
+        model=None,
+        deploy_backend="vllm",
+    ):
+        super().__init__(
+            model=model,
+            deploy_backend=deploy_backend,
+        )
+        self.block_name = "model.layers"
+
+    def get_observer_layers(self):
+        names = [
+            "k_proj",
+            "v_proj",
+            "q_proj",
+            "o_proj",
+            "up_proj",
+            "gate_proj",
+            "down_proj",
+        ]
+        obs_layers = [nn.Linear]
+        observer_layers_dict = {}
+        layers_dict = self.find_layers(self.model, layers=obs_layers)
+
+        ignore_layers = self.skip_layer_names()
+        for name, module in layers_dict.items():
+            if name.startswith(self.block_name) and name.split(".")[-1] in names:
+                observer_layers_dict[name] = module
+            else:
+                ignore_layers.append(name)
+        self.quant_config.quant_algo_info["ignore_layers"] = ignore_layers
+
+        if self.quant_config.custom_observe_layers_names != "default":
+            for custom_observe_name in self.quant_config.custom_observe_layers_names:
+                for default_name in observer_layers_dict.keys():
+                    if custom_observe_name not in default_name:
+                        observer_layers_dict.pop(default_name)
+        return observer_layers_dict
+
+    def get_smooth_mapping_layers(self, smooth_config, mappings=None):
+        if mappings is None:
+            mappings = [
+                (["q_proj", "k_proj", "v_proj"], "input_layernorm"),
+                (["gate_proj", "up_proj"], "post_attention_layernorm"),
+            ]
+        print(f"smooth mappings={mappings}")
+        assert len(mappings) == 2
+        assert smooth_config.smooth_first_linears or smooth_config.smooth_last_linears
+        # TODO: support smooth_last_linears
+        return super().get_smooth_mapping_layers(smooth_config, mappings)
+
+    def get_parent_dict(self, observer_layers_dict):
+        parent_mapping = {r"experts\.\d+": "experts"}
+        parent_dict = {}
+        for layer_name in observer_layers_dict.keys():
+            parent_name = layer_name
+            for k, v in parent_mapping.items():
+                parent_name = re.sub(k, v, layer_name)
+            if parent_name != layer_name:
+                parent_dict[layer_name] = parent_name
+        return parent_dict
+
+    def get_save_func(self):
+        if self.deploy_backend in ["vllm", "huggingface"]:
+            return PTQSaveVllmHF
+        else:
+            raise NotImplementedError(
+                f"deploy_backend {self.deploy_backend} is not supported for saving."
+            )
diff --git a/configs/seed_oss/fp8_dynamic/seed_oss-36b_instruct_fp8_dynamic.yaml b/configs/seed_oss/fp8_dynamic/seed_oss-36b_instruct_fp8_dynamic.yaml
@@ -0,0 +1,27 @@
+# Global configuration of pipeline
+global:
+  save_path: ./output
+
+# Simplified Configuration for LLM compression
+model:
+  name: SeedOss
+  model_path: ByteDance-Seed/Seed-OSS-36B-Instruct
+  trust_remote_code: true
+  low_cpu_mem_usage: true
+  use_cache: false
+  torch_dtype: auto
+  device_map: auto
+
+# Compression configuration
+compression:
+  name: PTQ
+  quantization:
+    name: fp8_dynamic
+    bits: 8
+    quant_method:
+      weight: "per-tensor"
+      activation: "per-tensor"
+    ignore_layers:         # Skip quantization for these layers
+      - "lm_head"
+      - "model.embed_tokens"
+
diff --git a/configs/seed_oss/fp8_static/seed_oss-36b_instruct_fp8_static.yaml b/configs/seed_oss/fp8_static/seed_oss-36b_instruct_fp8_static.yaml
@@ -0,0 +1,34 @@
+# Global configuration of pipeline
+global:
+  save_path: ./output
+
+# Simplified Configuration for LLM compression
+model:
+  name: SeedOss
+  model_path: ByteDance-Seed/Seed-OSS-36B-Instruct
+  trust_remote_code: true
+  low_cpu_mem_usage: true
+  use_cache: false
+  torch_dtype: auto
+  device_map: auto
+
+# Compression configuration
+compression:
+  name: PTQ
+  quantization:
+    name: fp8_static
+    bits: 8
+    quant_method:
+      weight: "per-tensor"
+      activation: "per-tensor"
+    ignore_layers:         # Skip quantization for these layers
+      - "lm_head"
+      - "model.embed_tokens"
+
+# Dataset for calibration
+dataset:
+  name: TextDataset
+  data_path: ./dataset/sharegpt_gpt4_qwen/sharegpt_gpt4-qwen3_a22B_output.jsonl
+  max_seq_length: 4096
+  num_samples: 256
+  batch_size: 1
diff --git a/docs/source/performance/quantization/benchmarks.md b/docs/source/performance/quantization/benchmarks.md
@@ -176,7 +176,7 @@ Qwen2.5VL系列模型的`BF16`、`FP8-Static`、`FP8-Dynamic`、`FP8-Static-ViT`
    +-------------------+------------------+----------+------------+--------------+
    | Model             | Quantization     | MMMU_VAL | DocVQA_VAL | ChartQA_TEST |
    +===================+==================+==========+============+==============+
-   | Qwen2.5VL-3B     | BF16             | 47.11    | 78.57      | 80.32        |
+   | Qwen2.5VL-3B      | BF16             | 47.11    | 78.57      | 80.32        |
    +                   +------------------+----------+------------+--------------+
    |                   | FP8-Static       | 47.33    | 79.34      | 79.68        | 
    +                   +------------------+----------+------------+--------------+
@@ -190,7 +190,7 @@ Qwen2.5VL系列模型的`BF16`、`FP8-Static`、`FP8-Dynamic`、`FP8-Static-ViT`
    +                   +------------------+----------+------------+--------------+
    |                   | INT4-AWQ         | 45.78    | -          | 79.60        | 
    +-------------------+------------------+----------+------------+--------------+
-   | Qwen2.5VL-7B     | BF16             | 45.44    | 89.71      | 84.64        |
+   | Qwen2.5VL-7B      | BF16             | 45.44    | 89.71      | 84.64        |
    |                   +------------------+----------+------------+--------------+
    |                   | FP8-Static       | 47.00    | 89.83      | 85.92        | 
    +                   +------------------+----------+------------+--------------+
@@ -204,7 +204,7 @@ Qwen2.5VL系列模型的`BF16`、`FP8-Static`、`FP8-Dynamic`、`FP8-Static-ViT`
    +                   +------------------+----------+------------+--------------+
    |                   | INT4-AWQ         | 45.67    | 89.28      | -            | 
    +-------------------+------------------+----------+------------+--------------+
-   | Qwen2.5VL-32B    | BF16             | 57.00    | 90.03      | -            |
+   | Qwen2.5VL-32B     | BF16             | 57.00    | 90.03      | -            |
    |                   +------------------+----------+------------+--------------+
    |                   | FP8-Static       | 57.00    | 89.88      | -            | 
    +                   +------------------+----------+------------+--------------+
@@ -218,7 +218,7 @@ Qwen2.5VL系列模型的`BF16`、`FP8-Static`、`FP8-Dynamic`、`FP8-Static-ViT`
    +                   +------------------+----------+------------+--------------+
    |                   | INT4-AWQ         | 55.22    | 90.30      | -            | 
    +-------------------+------------------+----------+------------+--------------+
-   | Qwen2.5VL-72B    | BF16             | 58.78    | 94.39      | 85.60        |
+   | Qwen2.5VL-72B     | BF16             | 58.78    | 94.39      | 85.60        |
    |                   +------------------+----------+------------+--------------+
    |                   | FP8-Static       | 57.89    | 94.41      | 85.84        | 
    +                   +------------------+----------+------------+--------------+
@@ -253,6 +253,29 @@ DeepSeek-R1-0528模型的`FP8-Block-Wise`、`W4A8-FP8`在`GPQA Diamond`、`AIME
    +-----------------------+----------------+--------------+-----------+----------+---------------+
 ```
 
+## Seed-OSS-36B-Instruct
+
+Seed-OSS-36B-Instruct模型的`FP8-Static`、`FP8-Dynamic`在`CEVAL`、`MMLU`、`GSM8K`、`HUMANEVAL`上的评测结果如下：
+
+```{eval-rst}
+.. table::
+   :align: center
+   :name: table-seed-oss-36b-performance
+
+   +-------------------------+----------------+---------+--------+----------------+------------------+-------------+
+   | Model                   | Quantization   | CEVAL   | MMLU   | GSM8K-strict   | GSM8K-flexible   | HUMANEVAL   |
+   +=========================+================+=========+========+================+==================+=============+
+   | Seed-OSS-36B-Instruct   | BF16           | 88.19   | 82.97  | 70.36          | 97.12            | 87.20       |
+   +                         +----------------+---------+--------+----------------+------------------+-------------+
+   |                         | FP8-Static     | 87.82   | 82.79  | 74.75          | 96.51            | 86.59       |
+   +                         +----------------+---------+--------+----------------+------------------+-------------+
+   |                         | FP8-Dynamic    | 87.82   | 82.64  | 74.15          | 96.89            | 87.20       |
+   +-------------------------+----------------+---------+--------+----------------+------------------+-------------+
+
+```
+
+该数据使用[lm-eval](https://github.com/EleutherAI/lm-evaluation-harness)工具评测，注意需要设置`--gen_kwargs max_gen_toks`防止思考内容过长被截断。
+
 
 ## 其他模型