Skip to content

Commit 01cc81b

Browse files
committed
fix conflict
2 parents e4498c9 + 61f206a commit 01cc81b

45 files changed

Lines changed: 2279 additions & 114 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -636,12 +636,12 @@ Benchmark results for Qwen3-VL series models using Eagle3 speculative decoding o
636636

637637
##### 1.2.2 HunyuanOCR Model
638638

639-
Benchmark results for HunyuanOCR using Eagle3 speculative decoding on vLLM (v0.13.0) across OCR tasks, using a single NVIDIA H20 GPU (**tp=1, ep=1, num_speculative_tokens=4, batch_size=1, output_len=1024**).
639+
Benchmark results for HunyuanOCR using Eagle3 speculative decoding on vLLM (v0.13.0) across **[OmniDocBench](https://huggingface.co/datasets/opendatalab/OmniDocBench)** dataset, using a single NVIDIA H20 GPU (**tp=1, ep=1, num_speculative_tokens=4, batch_size=1, output_len=1024**).
640640
<table><thead>
641641
<tr>
642642
<th>Model</th>
643643
<th>Method</th>
644-
<th colspan="2">OCR-Bench-Internal</th>
644+
<th colspan="2">OmniDocBench</th>
645645
</tr></thead>
646646
<tbody>
647647
<tr>
@@ -653,13 +653,13 @@ Benchmark results for HunyuanOCR using Eagle3 speculative decoding on vLLM (v0.1
653653
<tr>
654654
<td rowspan="2">Hunyuan-OCR</td>
655655
<td>Vanilla</td>
656-
<td>71.21</td>
656+
<td>70.12</td>
657657
<td>1</td>
658658
</tr>
659659
<tr>
660660
<td>Eagle3</td>
661-
<td>120.75</td>
662-
<td>2.2</td>
661+
<td>108.1</td>
662+
<td>2.08</td>
663663
</tr>
664664
</tbody>
665665
</table>

README_cn.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -640,13 +640,13 @@ bash scripts/deploy/lm_eval.sh -d 0,1 -t 2 -g 0.8 -r $RESULT_PATH -b "auto" --ta
640640

641641
##### 1.2.2 HunyuanOCR模型
642642

643-
我们使用(v0.13.0)评测了HunyuanOCR Eagle3模型在 **OCR-Bench** 上的接收长度和吞吐。结果是在单张H20上用以下设置测得:**tp=1, ep=1, num_speculative_tokens=4, batch_size=1, output_len=1024**
643+
我们使用(v0.13.0)评测了HunyuanOCR Eagle3模型在[OmniDocBench](https://huggingface.co/datasets/opendatalab/OmniDocBench)上的接收长度和吞吐。结果是在单张H20上用以下设置测得:**tp=1, ep=1, num_speculative_tokens=4, batch_size=1, output_len=1024**
644644

645645
<table><thead>
646646
<tr>
647647
<th>Model</th>
648648
<th>Method</th>
649-
<th colspan="2">OCR-Bench-Internal</th>
649+
<th colspan="2">OmniDocBench</th>
650650
</tr></thead>
651651
<tbody>
652652
<tr>
@@ -658,13 +658,13 @@ bash scripts/deploy/lm_eval.sh -d 0,1 -t 2 -g 0.8 -r $RESULT_PATH -b "auto" --ta
658658
<tr>
659659
<td rowspan="2">Hunyuan-OCR</td>
660660
<td>Vanilla</td>
661-
<td>71.21</td>
661+
<td>70.12</td>
662662
<td>1</td>
663663
</tr>
664664
<tr>
665665
<td>Eagle3</td>
666-
<td>120.75</td>
667-
<td>2.2</td>
666+
<td>108.1</td>
667+
<td>2.08</td>
668668
</tr>
669669
</tbody>
670670
</table>
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
{
2+
"architectures": [
3+
"Eagle3LlamaForCausalLM"
4+
],
5+
"model_type": "llama",
6+
"target_model_type": "hunyuan_vl",
7+
"attention_bias": false,
8+
"attention_dropout": 0.0,
9+
"bos_token_id": 120000,
10+
"eod_token_id": 120020,
11+
"eos_token_id": 120020,
12+
"dtype": "bfloat16",
13+
"head_dim": 128,
14+
"hidden_act": "silu",
15+
"hidden_size": 1024,
16+
"image_start_token_id": 120118,
17+
"image_end_token_id": 120119,
18+
"image_token_id": 120120,
19+
"image_newline_token_id": 120121,
20+
"initializer_range": 0.02,
21+
"intermediate_size": 3584,
22+
"max_position_embeddings": 32768,
23+
"num_attention_heads": 16,
24+
"num_hidden_layers": 1,
25+
"num_key_value_heads": 8,
26+
"rms_norm_eps": 1e-06,
27+
"rope_theta": 10000.0,
28+
"use_cache": true,
29+
"vocab_size": 120818,
30+
"tie_word_embeddings": true,
31+
"transformers_version": "4.57.1",
32+
"draft_vocab_size": 32000,
33+
"modal_type": "VLM"
34+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
{
2+
"architectures": [
3+
"Eagle3LlamaForCausalLM"
4+
],
5+
"model_type": "llama",
6+
"target_model_type": "qwen2_audio",
7+
"attention_bias": false,
8+
"attention_dropout": 0.0,
9+
"bos_token_id": 151643,
10+
"dtype": "bfloat16",
11+
"eos_token_id": 151645,
12+
"head_dim": 128,
13+
"hidden_act": "silu",
14+
"hidden_size": 4096,
15+
"initializer_range": 0.02,
16+
"intermediate_size": 11008,
17+
"max_position_embeddings": 8192,
18+
"num_attention_heads": 32,
19+
"num_hidden_layers": 1,
20+
"num_key_value_heads": 4,
21+
"rms_norm_eps": 1e-06,
22+
"rope_scaling": null,
23+
"rope_theta": 10000,
24+
"use_cache": true,
25+
"vocab_size": 156032,
26+
"tie_word_embeddings": false,
27+
"transformers_version": "4.57.1",
28+
"draft_vocab_size": 32000,
29+
"modal_type": "Audio"
30+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
{
2+
"architectures": [
3+
"Eagle3LlamaForCausalLM"
4+
],
5+
"model_type": "llama",
6+
"target_model_type": "qwen3_vl",
7+
"attention_bias": false,
8+
"attention_dropout": 0.0,
9+
"bos_token_id": 151643,
10+
"dtype": "bfloat16",
11+
"eos_token_id": 151645,
12+
"head_dim": 128,
13+
"hidden_act": "silu",
14+
"hidden_size": 2048,
15+
"initializer_range": 0.02,
16+
"intermediate_size": 6144,
17+
"max_position_embeddings": 262144,
18+
"num_attention_heads": 16,
19+
"num_hidden_layers": 1,
20+
"num_key_value_heads": 8,
21+
"rms_norm_eps": 1e-06,
22+
"rope_scaling": {
23+
"type": "default",
24+
"rope_type": "default",
25+
"mrope_interleaved": true,
26+
"mrope_section": [
27+
24,
28+
20,
29+
20
30+
]
31+
},
32+
"rope_theta": 5000000,
33+
"use_cache": true,
34+
"vocab_size": 151936,
35+
"tie_word_embeddings": true,
36+
"transformers_version": "4.57.1",
37+
"image_token_id": 151655,
38+
"video_token_id": 151656,
39+
"vision_end_token_id": 151653,
40+
"vision_start_token_id": 151652,
41+
"draft_vocab_size": 32000,
42+
"modal_type": "VLM"
43+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
{
2+
"architectures": [
3+
"Eagle3LlamaForCausalLM"
4+
],
5+
"model_type": "llama",
6+
"target_model_type": "qwen3_vl",
7+
"attention_bias": false,
8+
"attention_dropout": 0.0,
9+
"bos_token_id": 151643,
10+
"dtype": "bfloat16",
11+
"eos_token_id": 151645,
12+
"head_dim": 128,
13+
"hidden_act": "silu",
14+
"hidden_size": 2048,
15+
"initializer_range": 0.02,
16+
"intermediate_size": 6144,
17+
"max_position_embeddings": 262144,
18+
"num_attention_heads": 32,
19+
"num_hidden_layers": 1,
20+
"num_key_value_heads": 4,
21+
"rms_norm_eps": 1e-06,
22+
"rope_scaling": {
23+
"type": "default",
24+
"rope_type": "default",
25+
"mrope_interleaved": true,
26+
"mrope_section": [
27+
24,
28+
20,
29+
20
30+
]
31+
},
32+
"rope_theta": 5000000,
33+
"use_cache": true,
34+
"vocab_size": 151936,
35+
"tie_word_embeddings": false,
36+
"transformers_version": "4.57.1",
37+
"image_token_id": 151655,
38+
"video_token_id": 151656,
39+
"vision_end_token_id": 151653,
40+
"vision_start_token_id": 151652,
41+
"draft_vocab_size": 32000,
42+
"modal_type": "VLM"
43+
}

angelslim/compressor/speculative/train/configs/qwen3-vl-4b-eagle3-mrope.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"Eagle3LlamaForCausalLM"
44
],
55
"model_type": "llama",
6+
"target_model_type": "qwen3_vl",
67
"attention_bias": false,
78
"attention_dropout": 0.0,
89
"bos_token_id": 151643,

angelslim/compressor/speculative/train/data/chat_templates.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,18 +26,22 @@
2626
class ChatTemplateType(Enum):
2727
"""Supported chat template types."""
2828

29+
QWEN2_AUDIO = "qwen2_audio"
2930
QWEN3 = "qwen3"
3031
HUNYUAN = "hunyuan"
3132
QWEN3_VL = "qwen3_vl"
3233
HUNYUAN_7B = "hunyuan_7b"
34+
HUNYUAN_VL = "hunyuan_vl"
3335

3436

3537
# String to ChatTemplateType mapping
3638
CHAT_TEMPLATE_TYPE_MAPPING = {
39+
"qwen2_audio": ChatTemplateType.QWEN2_AUDIO,
3740
"qwen3": ChatTemplateType.QWEN3,
3841
"hunyuan": ChatTemplateType.HUNYUAN,
3942
"hunyuan_7b": ChatTemplateType.HUNYUAN_7B,
4043
"qwen3_vl": ChatTemplateType.QWEN3_VL,
44+
"hunyuan_vl": ChatTemplateType.HUNYUAN_VL,
4145
}
4246

4347

@@ -133,6 +137,26 @@ def _initialize_templates(self) -> Dict[ChatTemplateType, ChatTemplate]:
133137
}
134138
],
135139
),
140+
ChatTemplateType.QWEN2_AUDIO: ChatTemplate(
141+
user_header="<|im_start|>user\n",
142+
assistant_header="<|im_start|>assistant\n",
143+
system_prompt=[
144+
{
145+
"type": "text",
146+
"text": ("You are a helpful assistant."),
147+
}
148+
],
149+
),
150+
ChatTemplateType.HUNYUAN_VL: ChatTemplate(
151+
user_header="<|hy_Assistant|>",
152+
assistant_header="<|hy_User|>",
153+
system_prompt=[
154+
{
155+
"type": "text",
156+
"text": "",
157+
}
158+
],
159+
),
136160
}
137161

138162
def get_template(self, chat_template_type: ChatTemplateType) -> ChatTemplate:

0 commit comments

Comments
 (0)