Skip to content

Commit 88a3927

Browse files
ruixiang63ggerganovtnhnyzcDogacelCISC
authored
spec: add EAGLE3 speculative decoding support (ggml-org#18039)
* llama : enable layer input extraction * spec: support eagle3 * eagle3: fix params bug * eagle3: support Gemma4 eagle3 from RedHatAI * eagle3: set sync when get features from target Co-authored-by: tnhnyzc <115956684+tnhnyzc@users.noreply.github.com> * eagle3 : fix ubatch handling in embd_layer_inp extraction and encoder Co-authored-by: Doğaç Eldenk <dogacel@gmail.com> * eagle3: adapt to upstream changes * eagle3: fix rebase issues and adapt to upstream changes * eagle3:exclude the eagle3 arch from test-llama-archs * eagle3: fix editorconfig check failures * eagle3: fix multi-seq issue in d2t vocab mapping * cont : minor style / clean-up * spec : remove `common_speculative_setup_draft_model()` * llama : clean-up unused API * eagle3: set d2t vocab mapping in decode graph * cont : assert layer inputs are configured * hparams : use n_embd_inp instead of n_embd_target_features * eagle3: make output.weight optional and inherit from target model when needed * haparams : generic norm-before-residual param * llama-ext : consistent names * cont : fix * hparams : remove target_hidden_size * cparams : rename output_layer_inp -> embeddings_layer_inp * arch : reuse ATTN_NORM_2 instead of adding new hidden norm * llama : clean-up names * cont : add assert + comment * Update conversion/llama.py Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: tnhnyzc <115956684+tnhnyzc@users.noreply.github.com> Co-authored-by: Doğaç Eldenk <dogacel@gmail.com> Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
1 parent 85f99dc commit 88a3927

27 files changed

Lines changed: 1161 additions & 39 deletions

common/speculative.cpp

Lines changed: 418 additions & 10 deletions
Large diffs are not rendered by default.

conversion/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,9 @@
130130
"LlamaBidirectionalModel": "llama",
131131
"LlamaForCausalLM": "llama",
132132
"LlamaModel": "llama",
133+
"Eagle3DraftModel": "llama",
134+
"Eagle3Speculator": "llama",
135+
"LlamaForCausalLMEagle3": "llama",
133136
"LlavaForConditionalGeneration": "llama",
134137
"LlavaStableLMEpochForCausalLM": "stablelm",
135138
"MPTForCausalLM": "mpt",

conversion/base.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ class ModelBase:
9494
metadata: gguf.Metadata
9595
dir_model_card: Path
9696
remote_hf_model_id: str | None
97+
target_model_dir: Path | None
9798

9899
# subclasses should define this!
99100
model_arch: gguf.MODEL_ARCH
@@ -119,6 +120,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
119120
small_first_shard: bool = False, hparams: dict[str, Any] | None = None, remote_hf_model_id: str | None = None,
120121
disable_mistral_community_chat_template: bool = False,
121122
sentence_transformers_dense_modules: bool = False,
123+
target_model_dir: Path | None = None,
122124
fuse_gate_up_exps: bool = False,
123125
fp8_as_q8: bool = False):
124126
if type(self) is ModelBase or \
@@ -139,6 +141,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
139141
self.dry_run = dry_run
140142
self.remote_hf_model_id = remote_hf_model_id
141143
self.sentence_transformers_dense_modules = sentence_transformers_dense_modules
144+
self.target_model_dir = target_model_dir
142145
self.fuse_gate_up_exps = fuse_gate_up_exps
143146
self._gate_exp_buffer: dict[int, Tensor] = {}
144147
self._up_exp_buffer: dict[int, Tensor] = {}
@@ -2481,6 +2484,7 @@ class LazyTorchTensor(gguf.LazyBase):
24812484
torch.float16: np.float16,
24822485
torch.float32: np.float32,
24832486
torch.uint8: np.uint8,
2487+
torch.int64: np.int64,
24842488
}
24852489

24862490
# only used when byteswapping data. Only correct size is needed

conversion/llama.py

Lines changed: 130 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@
55

66
from typing import Callable, Iterable, TYPE_CHECKING
77

8+
import numpy as np
89
import torch
910

1011
if TYPE_CHECKING:
1112
from torch import Tensor
1213

13-
from .base import ModelBase, TextModel, gguf
14+
from .base import ModelBase, TextModel, gguf, logger
1415

1516

1617
@ModelBase.register(
@@ -21,6 +22,9 @@
2122
"VLlama3ForCausalLM",
2223
"LlavaForConditionalGeneration",
2324
"VoxtralForConditionalGeneration",
25+
"LlamaForCausalLMEagle3",
26+
"Eagle3Speculator",
27+
"Eagle3DraftModel",
2428
"IQuestCoderForCausalLM",
2529
"LlamaModel")
2630
class LlamaModel(TextModel):
@@ -39,7 +43,61 @@ def __init__(self, *args, **kwargs):
3943
hparams = ModelBase.load_hparams(self.dir_model, is_mistral_format=False)
4044
self.origin_hf_arch = hparams.get('architectures', [None])[0]
4145

46+
# Detect eagle3 draft checkpoint by hparams (some models don't use a distinct HF arch name)
47+
if "draft_vocab_size" in self.hparams and self.hparams["num_hidden_layers"] == 1:
48+
self.is_eagle3 = True
49+
self.model_arch = gguf.MODEL_ARCH.EAGLE3
50+
logger.info("Detected EAGLE-3 draft model, switching to EAGLE3 architecture")
51+
# Re-initialize tensor_map with eagle3 architecture
52+
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
53+
# Update gguf_writer architecture
54+
self.gguf_writer.arch = gguf.MODEL_ARCH_NAMES[self.model_arch]
55+
self.gguf_writer.add_architecture()
56+
if self.target_model_dir is None:
57+
raise ValueError(
58+
"EAGLE-3 model requires --target-model-dir to be specified. "
59+
"Please provide the path to the target model directory to read config.json"
60+
)
61+
# Read both eagle3 raw config and target model config
62+
with open(self.dir_model / "config.json", 'r', encoding='utf-8') as f:
63+
eagle3_raw_config = json.load(f)
64+
with open(self.target_model_dir / "config.json", 'r', encoding='utf-8') as f:
65+
target_config = json.load(f)
66+
67+
if "text_config" in target_config:
68+
target_config = {**target_config, **target_config["text_config"]}
69+
self.target_vocab_size = target_config["vocab_size"]
70+
71+
# target_layers: derived from target model layer count (low/mid/high)
72+
target_num_layers = target_config["num_hidden_layers"]
73+
target_layers = [2, target_num_layers // 2, target_num_layers - 3]
74+
logger.info(f"EAGLE-3: target_layers = {target_layers} (target model has {target_num_layers} layers)")
75+
self.gguf_writer.add_array(f"{self.gguf_writer.arch}.target_layers", target_layers)
76+
77+
# target_hidden_size: prefer eagle3 config, fallback to target config
78+
if eagle3_raw_config.get("target_hidden_size") is not None:
79+
target_hidden_size = eagle3_raw_config["target_hidden_size"]
80+
src = "EAGLE-3 config"
81+
else:
82+
target_hidden_size = target_config["hidden_size"]
83+
src = "target model config"
84+
logger.info(f"EAGLE-3: target_hidden_size = {target_hidden_size} (from {src})")
85+
self.gguf_writer.add_uint32(f"{self.gguf_writer.arch}.target_hidden_size", target_hidden_size)
86+
87+
# norm_before_residual (RedHat-style eagle3 specific)
88+
norm_before_residual = eagle3_raw_config.get("norm_before_residual", False)
89+
logger.info(f"EAGLE-3: norm_before_residual = {norm_before_residual}")
90+
self.gguf_writer.add_bool(f"{self.gguf_writer.arch}.norm_before_residual", norm_before_residual)
91+
4292
def set_vocab(self):
93+
# eagle3: use tokenizer from target model if provided
94+
original_dir_model = None
95+
if getattr(self, 'is_eagle3', False):
96+
assert self.target_model_dir is not None
97+
logger.info(f"EAGLE-3: Using tokenizer from target model: {self.target_model_dir}")
98+
original_dir_model = self.dir_model
99+
self.dir_model = self.target_model_dir
100+
43101
if self.origin_hf_arch == "GlmasrModel":
44102
return self._set_vocab_glmedge()
45103

@@ -85,6 +143,10 @@ def set_vocab(self):
85143
if self.hparams.get("vocab_size", 32000) == 49152:
86144
self.gguf_writer.add_add_bos_token(False)
87145

146+
# eagle3: Restore original dir_model
147+
if original_dir_model is not None:
148+
self.dir_model = original_dir_model
149+
88150
def set_gguf_parameters(self):
89151
super().set_gguf_parameters()
90152
hparams = self.hparams
@@ -129,7 +191,49 @@ def filter_tensors(cls, item: tuple[str, Callable[[], Tensor]]) -> tuple[str, Ca
129191

130192
return super().filter_tensors((name, gen))
131193

194+
def index_tensors(self, remote_hf_model_id: str | None = None) -> dict[str, Callable[[], Tensor]]:
195+
tensors = super().index_tensors(remote_hf_model_id)
196+
197+
# Handle Eagle3Speculator nested config
198+
if "transformer_layer_config" in self.hparams:
199+
self.hparams = {**self.hparams, **self.hparams["transformer_layer_config"]}
200+
201+
# eagle3 detection
202+
if "draft_vocab_size" in self.hparams and self.hparams["num_hidden_layers"] == 1:
203+
logger.info("EAGLE-3: renaming midlayer.* / layers.0.* to model.layers.0.*")
204+
new_tensors = {}
205+
for name, gen in tensors.items():
206+
if name.startswith("midlayer."):
207+
new_name = "model.layers.0." + name[len("midlayer."):]
208+
new_tensors[new_name] = gen
209+
elif name.startswith("layers.0."): # Eagle3Speculator format
210+
new_name = "model." + name
211+
new_tensors[new_name] = gen
212+
else:
213+
new_tensors[name] = gen
214+
return new_tensors
215+
216+
return tensors
217+
132218
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
219+
# eagle3: special tensors that bypass standard llama mapping
220+
if getattr(self, 'is_eagle3', False):
221+
if name == "fc.weight":
222+
yield (name, data_torch)
223+
return
224+
if name == "d2t":
225+
# store for manual int64 handling in prepare_tensors (avoid F32 conversion)
226+
if not hasattr(self, '_eagle3_int_tensors'):
227+
self._eagle3_int_tensors = {}
228+
self._eagle3_int_tensors[name] = data_torch
229+
return
230+
if name == "t2d":
231+
# not used at runtime, skip
232+
return
233+
if name.endswith(".hidden_norm.weight"):
234+
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_NORM_2, bid), data_torch)
235+
return
236+
133237
n_head = self.find_hparam(["n_heads", "num_attention_heads"])
134238
n_kv_head = self.find_hparam(["n_kv_heads", "num_key_value_heads"])
135239

@@ -205,8 +309,33 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
205309
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), torch.tensor(rope_factors, dtype=torch.float32))
206310

207311
def prepare_tensors(self):
312+
# eagle3: collect d2t original dtype before parent converts tensors to F32
313+
eagle3_original_dtypes = {}
314+
if getattr(self, 'is_eagle3', False):
315+
for name, data_torch in self.get_tensors():
316+
if name == "d2t":
317+
eagle3_original_dtypes[name] = data_torch.dtype
318+
208319
super().prepare_tensors()
209320

321+
# eagle3: write d2t as absolute target token ids
322+
if getattr(self, 'is_eagle3', False) and hasattr(self, '_eagle3_int_tensors'):
323+
for name, data_torch in self._eagle3_int_tensors.items():
324+
old_dtype = eagle3_original_dtypes.get(name, data_torch.dtype)
325+
data = data_torch.to(torch.int64).cpu().numpy()
326+
if name == "d2t":
327+
data = data.reshape(-1)
328+
data = data + np.arange(data.size, dtype=np.int64)
329+
if np.any((data < 0) | (data >= self.target_vocab_size)):
330+
raise ValueError(f"EAGLE-3 d2t target ids out of range for target vocab size {self.target_vocab_size}")
331+
if np.unique(data).size != data.size:
332+
raise ValueError("EAGLE-3 d2t contains duplicate target ids")
333+
data_qtype = gguf.GGMLQuantizationType.I64
334+
335+
shape_str = f"{{{', '.join(str(n) for n in reversed(data.shape))}}}"
336+
logger.info(f"{name + ',':<30} {old_dtype} --> {data_qtype.name}, shape = {shape_str}")
337+
self.gguf_writer.add_tensor(name, data, raw_dtype=data_qtype)
338+
210339
if self._experts is not None:
211340
# flatten `list[dict[str, Tensor]]` into `list[str]`
212341
experts = [k for d in self._experts for k in d.keys()]

convert_hf_to_gguf.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,15 @@ def parse_args() -> argparse.Namespace:
153153
help="Store tensors dequantized from FP8 as Q8_0 instead of BF16/F16.",
154154
)
155155

156+
parser.add_argument(
157+
"--target-model-dir", type=str, default=None,
158+
help=(
159+
"path to the target model directory; required when converting a standalone draft model "
160+
"(e.g. EAGLE3 / DFlash) that needs target-model metadata such as tokenizer, hidden size, and "
161+
"layer count to populate its GGUF."
162+
),
163+
)
164+
156165
args = parser.parse_args()
157166
if not args.print_supported_models and args.model is None:
158167
parser.error("the following arguments are required: model")
@@ -269,6 +278,7 @@ def main() -> None:
269278
small_first_shard=args.no_tensor_first_split,
270279
remote_hf_model_id=hf_repo_id, disable_mistral_community_chat_template=disable_mistral_community_chat_template,
271280
sentence_transformers_dense_modules=args.sentence_transformers_dense_modules,
281+
target_model_dir=Path(args.target_model_dir) if args.target_model_dir else None,
272282
fuse_gate_up_exps=args.fuse_gate_up_exps,
273283
fp8_as_q8=args.fp8_as_q8,
274284
)

gguf-py/gguf/constants.py

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,9 @@ class LLM:
154154
HIDDEN_ACT = "{arch}.hidden_activation"
155155
DENSE_FEAT_IN_SIZE = "{arch}.{dense}_feat_in"
156156
DENSE_FEAT_OUT_SIZE = "{arch}.{dense}_feat_out"
157+
TARGET_LAYERS = "{arch}.target_layers"
158+
TARGET_HIDDEN_SIZE = "{arch}.target_hidden_size"
159+
NORM_BEFORE_RESIDUAL = "{arch}.norm_before_residual"
157160

158161
class Attention:
159162
HEAD_COUNT = "{arch}.attention.head_count"
@@ -511,6 +514,7 @@ class MODEL_ARCH(IntEnum):
511514
RND1 = auto()
512515
PANGU_EMBED = auto()
513516
MISTRAL3 = auto()
517+
EAGLE3 = auto()
514518
MISTRAL4 = auto()
515519
PADDLEOCR = auto()
516520
MIMO2 = auto()
@@ -901,14 +905,17 @@ class MODEL_TENSOR(IntEnum):
901905
A_PER_DIM_K_SCALE = auto() # gemma4
902906
A_PER_DIM_SCALE = auto() # gemma4
903907
# nextn/mtp
904-
NEXTN_PROJ_PRE = auto()
905-
NEXTN_PROJ_POST = auto()
906-
NEXTN_EH_PROJ = auto()
907-
NEXTN_EMBED_TOKENS = auto()
908-
NEXTN_ENORM = auto()
909-
NEXTN_HNORM = auto()
908+
NEXTN_PROJ_PRE = auto()
909+
NEXTN_PROJ_POST = auto()
910+
NEXTN_EH_PROJ = auto()
911+
NEXTN_EMBED_TOKENS = auto()
912+
NEXTN_ENORM = auto()
913+
NEXTN_HNORM = auto()
910914
NEXTN_SHARED_HEAD_HEAD = auto()
911915
NEXTN_SHARED_HEAD_NORM = auto()
916+
# eagle3
917+
FC = auto() # feature fusion layer
918+
D2T = auto() # draft to target vocabulary mapping
912919
# lfm2 audio
913920
A_ENC_NORM_CONV = auto()
914921
A_ENC_LINEAR_POS = auto()
@@ -1063,6 +1070,7 @@ class MODEL_TENSOR(IntEnum):
10631070
MODEL_ARCH.RND1: "rnd1",
10641071
MODEL_ARCH.PANGU_EMBED: "pangu-embedded",
10651072
MODEL_ARCH.MISTRAL3: "mistral3",
1073+
MODEL_ARCH.EAGLE3: "eagle3",
10661074
MODEL_ARCH.MISTRAL4: "mistral4",
10671075
MODEL_ARCH.PADDLEOCR: "paddleocr",
10681076
MODEL_ARCH.MIMO2: "mimo2",
@@ -1095,8 +1103,8 @@ class MODEL_TENSOR(IntEnum):
10951103
MODEL_TENSOR.POS_EMBD: "position_embd",
10961104
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
10971105
MODEL_TENSOR.OUTPUT: "output",
1098-
MODEL_TENSOR.DENSE_2_OUT: "dense_2", # embeddinggemma 2_Dense
1099-
MODEL_TENSOR.DENSE_3_OUT: "dense_3", # embeddinggemma 2_Dense
1106+
MODEL_TENSOR.DENSE_2_OUT: "dense_2", # embeddinggemma 2_Dense
1107+
MODEL_TENSOR.DENSE_3_OUT: "dense_3", # embeddinggemma 2_Dense
11001108
MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
11011109
MODEL_TENSOR.ROPE_FACTORS_LONG: "rope_factors_long",
11021110
MODEL_TENSOR.ROPE_FACTORS_SHORT: "rope_factors_short",
@@ -1488,6 +1496,8 @@ class MODEL_TENSOR(IntEnum):
14881496
MODEL_TENSOR.NEXTN_HNORM: "blk.{bid}.nextn.hnorm",
14891497
MODEL_TENSOR.NEXTN_SHARED_HEAD_HEAD: "blk.{bid}.nextn.shared_head_head",
14901498
MODEL_TENSOR.NEXTN_SHARED_HEAD_NORM: "blk.{bid}.nextn.shared_head_norm",
1499+
MODEL_TENSOR.FC: "fc",
1500+
MODEL_TENSOR.D2T: "d2t",
14911501
}
14921502

14931503
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
@@ -4028,6 +4038,24 @@ class MODEL_TENSOR(IntEnum):
40284038
MODEL_TENSOR.FFN_DOWN_EXP,
40294039
MODEL_TENSOR.FFN_UP_EXP,
40304040
],
4041+
MODEL_ARCH.EAGLE3: [
4042+
MODEL_TENSOR.TOKEN_EMBD,
4043+
MODEL_TENSOR.OUTPUT_NORM,
4044+
MODEL_TENSOR.OUTPUT,
4045+
MODEL_TENSOR.ROPE_FREQS,
4046+
MODEL_TENSOR.ATTN_NORM,
4047+
MODEL_TENSOR.ATTN_NORM_2,
4048+
MODEL_TENSOR.ATTN_Q,
4049+
MODEL_TENSOR.ATTN_K,
4050+
MODEL_TENSOR.ATTN_V,
4051+
MODEL_TENSOR.ATTN_OUT,
4052+
MODEL_TENSOR.FFN_NORM,
4053+
MODEL_TENSOR.FFN_GATE,
4054+
MODEL_TENSOR.FFN_DOWN,
4055+
MODEL_TENSOR.FFN_UP,
4056+
MODEL_TENSOR.FC,
4057+
MODEL_TENSOR.D2T,
4058+
],
40314059
MODEL_ARCH.MISTRAL4: [
40324060
MODEL_TENSOR.TOKEN_EMBD,
40334061
MODEL_TENSOR.OUTPUT_NORM,

src/llama-arch.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
#include "llama-impl.h"
44

55
#include <map>
6-
#include <set>
76
#include <vector>
87

98
static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
@@ -128,6 +127,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
128127
{ LLM_ARCH_RND1, "rnd1" },
129128
{ LLM_ARCH_PANGU_EMBED, "pangu-embedded" },
130129
{ LLM_ARCH_MISTRAL3, "mistral3" },
130+
{ LLM_ARCH_EAGLE3, "eagle3" },
131131
{ LLM_ARCH_MISTRAL4, "mistral4" },
132132
{ LLM_ARCH_PADDLEOCR, "paddleocr" },
133133
{ LLM_ARCH_MIMO2, "mimo2" },
@@ -292,12 +292,16 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
292292

293293
{ LLM_KV_CLASSIFIER_OUTPUT_LABELS, "%s.classifier.output_labels" },
294294

295+
{ LLM_KV_TARGET_LAYERS, "%s.target_layers" },
296+
{ LLM_KV_TARGET_HIDDEN_SIZE, "%s.target_hidden_size" },
297+
{ LLM_KV_NORM_BEFORE_RESIDUAL, "%s.norm_before_residual" },
298+
295299
{ LLM_KV_SHORTCONV_L_CACHE, "%s.shortconv.l_cache" },
296300
// sentence-transformers dense modules feature dims
297301
{ LLM_KV_DENSE_2_FEAT_IN, "%s.dense_2_feat_in" },
298-
{ LLM_KV_DENSE_2_FEAT_OUT, "%s.dense_2_feat_out" },
299-
{ LLM_KV_DENSE_3_FEAT_IN, "%s.dense_3_feat_in" },
300-
{ LLM_KV_DENSE_3_FEAT_OUT, "%s.dense_3_feat_out" },
302+
{ LLM_KV_DENSE_2_FEAT_OUT, "%s.dense_2_feat_out" },
303+
{ LLM_KV_DENSE_3_FEAT_IN, "%s.dense_3_feat_in" },
304+
{ LLM_KV_DENSE_3_FEAT_OUT, "%s.dense_3_feat_out" },
301305

302306
{ LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
303307
{ LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
@@ -562,6 +566,8 @@ static const std::map<llm_tensor, const char *> LLM_TENSOR_NAMES = {
562566
{ LLM_TENSOR_INDEXER_ATTN_Q_B, "blk.%d.indexer.attn_q_b" },
563567
{ LLM_TENSOR_MASKED_EMBD_CENTROIDS, "masked_embd_centroids" },
564568
{ LLM_TENSOR_MASKED_EMBD_ORDERING, "masked_embd_ordering" },
569+
{ LLM_TENSOR_FC, "fc" },
570+
{ LLM_TENSOR_D2T, "d2t" },
565571
};
566572

567573
// declare information about the model weight tensors:
@@ -788,6 +794,9 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
788794
{LLM_TENSOR_FFN_LATENT_UP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
789795
{LLM_TENSOR_MASKED_EMBD_CENTROIDS, {LLM_TENSOR_LAYER_INPUT, GGML_OP_NONE}},
790796
{LLM_TENSOR_MASKED_EMBD_ORDERING, {LLM_TENSOR_LAYER_INPUT, GGML_OP_NONE}},
797+
// eagle3
798+
{LLM_TENSOR_FC, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
799+
{LLM_TENSOR_D2T, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_GET_ROWS}},
791800
};
792801

793802
LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {}

0 commit comments

Comments
 (0)