From 73b87848cd716a3107c4357d7be055b794fd31b0 Mon Sep 17 00:00:00 2001 From: liusong1222 Date: Wed, 10 Sep 2025 12:50:36 +0800 Subject: [PATCH 1/2] adjust head_dim fetching logic from config in llama attention module --- .../models/eagle3/draft/llama3_eagle3.py | 19 ++++--------------- requirements.txt | 4 +++- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/angelslim/compressor/speculative/inference/models/eagle3/draft/llama3_eagle3.py b/angelslim/compressor/speculative/inference/models/eagle3/draft/llama3_eagle3.py index e77c79ec..2d513619 100644 --- a/angelslim/compressor/speculative/inference/models/eagle3/draft/llama3_eagle3.py +++ b/angelslim/compressor/speculative/inference/models/eagle3/draft/llama3_eagle3.py @@ -240,17 +240,13 @@ def __init__(self, config): self.config = config self.hidden_size = config.hidden_size self.num_heads = config.num_attention_heads - self.head_dim = self.hidden_size // self.num_heads + self.head_dim = getattr( + config, "head_dim", config.hidden_size // config.num_attention_heads + ) self.num_key_value_heads = config.num_key_value_heads self.num_key_value_groups = self.num_heads // self.num_key_value_heads self.max_position_embeddings = config.max_position_embeddings - if (self.head_dim * self.num_heads) != self.hidden_size: - raise ValueError( - f"hidden_size must be divisible by num_heads " - f"(got `hidden_size`: {self.hidden_size}" - f" and `num_heads`: {self.num_heads})." - ) self.q_proj = nn.Linear( self.hidden_size * 2, self.num_heads * self.head_dim, bias=False ) @@ -400,15 +396,8 @@ def forward( ).to(query_states.dtype) attn_output = torch.matmul(attn_weights, value_states) - if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): - raise ValueError( - f"`attn_output` should be of size " - f"{(bsz, self.num_heads, q_len, self.head_dim)}, but is" - f" {attn_output.size()}" - ) - attn_output = attn_output.transpose(1, 2).contiguous() - attn_output = attn_output.reshape(bsz, q_len, self.hidden_size) + attn_output = attn_output.reshape(bsz, q_len, -1) if self.config.pretraining_tp > 1: attn_output = attn_output.split( diff --git a/requirements.txt b/requirements.txt index 90b2faa0..e44a3037 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,4 +14,6 @@ datasets fschat openai anthropic -ray \ No newline at end of file +ray +referencing +jsonschema_specifications \ No newline at end of file From 1df6263eae7cc57dc442dd7e975749e8d98df4b7 Mon Sep 17 00:00:00 2001 From: liusong1222 Date: Wed, 10 Sep 2025 13:05:31 +0800 Subject: [PATCH 2/2] fix hunyuan eagle3 benchmark format --- .../speculative_decoding/benchmarks.md | 35 ++++++------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/docs/source/performance/speculative_decoding/benchmarks.md b/docs/source/performance/speculative_decoding/benchmarks.md index 63cd82c1..f73c2a4f 100644 --- a/docs/source/performance/speculative_decoding/benchmarks.md +++ b/docs/source/performance/speculative_decoding/benchmarks.md @@ -23,28 +23,13 @@ ### Hunyuan Series Models - - - - - - - - - - - - - - - - - - - - - - - -
  MT-benchHumanEvalGSM8KAlpacaMean
TemperatureModelSpeedupτSpeedupτSpeedupτSpeedupτSpeedupτ
Temperature=0Hunyuan-1.8B-Instruct1.97x2.902.58x3.732.61x3.711.71x2.432.22x3.19
Hunyuan-4B-Instruct1.77x2.602.64x3.352.14x3.171.72x2.572.07x2.92
Hunyuan-7B-Instruct2.22x3.583.59x5.472.96x4.681.64x2.562.60x4.07
Temperature=1Hunyuan-1.8B-Instruct1.58x2.362.35x3.562.23x3.381.26x1.871.86x2.79
Hunyuan-4B-Instruct1.36x2.051.97x2.861.72x2.681.14x1.761.55x2.34
Hunyuan-7B-Instruct1.90x3.113.12x5.092.74x4.341.47x2.392.31x3.73
- \ No newline at end of file +| | | MT-bench | | HumanEval | | GSM8K | | Alpaca | | Mean | | +|------------------|--------------|------------------|------------|-------------------|-------------|----------------|---------|----------------|----------|---------------|--------| +| | Model | Speedup | τ | Speedup | τ | Speedup | τ | Speedup | τ | Speedup | τ | +| | Hunyuan-1.8B | 1.97x | 2.90 | 2.58x | 3.73 | 2.61x | 3.71 | 1.71x | 2.43 | 2.22x | 3.19 | +| **Temperature=0**| Hunyuan-4B | 1.77x | 2.60 | 2.64x | 3.35 | 2.14x | 3.17 | 1.72x | 2.57 | 2.07x | 2.92 | +| | Hunyuan-7B | 2.22x | 3.58 | 3.59x | 5.47 | 2.96x | 4.68 | 1.64x | 2.56 | 2.60x | 4.07 | +| | | | | | | | | | | | | +| | Hunyuan-1.8B | 1.58x | 2.36 | 2.35x | 3.56 | 2.23x | 3.38 | 1.26x | 1.87 | 1.86x | 2.79 | +| **Temperature=1**| Hunyuan-1.8B | 1.36x | 2.05 | 1.97x | 2.86 | 1.72x | 2.68 | 1.14x | 1.76 | 1.55x | 2.34 | +| | Hunyuan-1.8B | 1.90x | 3.11 | 3.12x | 5.09 | 2.74x | 4.34 | 1.47x | 2.39 | 2.31x | 3.73 |