Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ The following is the list of models supported by MCore-Bridge:
| Ovis | ovis2_5 |
| Llama | llama, llama4 |
| GPT-OSS | gpt_oss |
| Hunyuan | hy_v3 |
| ERNIE | ernie4_5, ernie4_5_moe |
| MiMo | mimo |
| Dots | dots1 |
Expand Down
1 change: 1 addition & 0 deletions README_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ uv pip install -e . --torch-backend=auto
| Ovis | ovis2_5 |
| Llama | llama, llama4 |
| GPT-OSS | gpt_oss |
| Hunyuan | hy_v3 |
| ERNIE | ernie4_5, ernie4_5_moe |
| MiMo | mimo |
| Dots | dots1 |
Expand Down
4 changes: 2 additions & 2 deletions src/mcore_bridge/config/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
'qk_pos_emb_head_dim': ['qk_rope_head_dim'],
'v_head_dim': ['v_head_dim'],
'moe_router_topk_scaling_factor': ['routed_scaling_factor'],
'qk_layernorm': ['use_qk_norm'],
'qk_layernorm': ['use_qk_norm', 'qk_norm'],
# qwen3_next/qwen3_5
'linear_attention_freq': ['full_attention_interval'],
'linear_num_key_heads': ['linear_num_key_heads'],
Expand All @@ -56,7 +56,7 @@
'original_max_position_embeddings': ['original_max_position_embeddings'],
'partial_rotary_factor': ['partial_rotary_factor'],
'first_k_dense_replace': ['first_k_dense_replace', 'moe_layer_start_index'],
'n_shared_experts': ['n_shared_experts', 'num_shared_expert', 'moe_num_shared_experts'],
'n_shared_experts': ['n_shared_experts', 'num_shared_expert', 'moe_num_shared_experts', 'num_shared_experts'],
'window_size': ['sliding_window'],
'layer_types': ['layer_types'],
'interleave_moe_layer_step': ['interleave_moe_layer_step'],
Expand Down
1 change: 1 addition & 0 deletions src/mcore_bridge/model/constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class LLMModelType:
olmoe = 'olmoe'
glm4 = 'glm4'
minimax_m2 = 'minimax_m2'
hy_v3 = 'hy_v3'

qwen3_emb = 'qwen3_emb'

Expand Down
2 changes: 1 addition & 1 deletion src/mcore_bridge/model/gpts/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# Copyright (c) ModelScope Contributors. All rights reserved.
from . import glm4, llm, minimax_m2, olmoe, qwen3_emb, qwen3_next
from . import glm4, hunyuan, llm, minimax_m2, olmoe, qwen3_emb, qwen3_next
19 changes: 19 additions & 0 deletions src/mcore_bridge/model/gpts/hunyuan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copyright (c) ModelScope Contributors. All rights reserved.

from mcore_bridge.bridge import GPTBridge

from ..constant import ModelType
from ..register import ModelMeta, register_model


class HyV3Bridge(GPTBridge):
hf_gate_key = 'router.gate.weight'
hf_expert_bias_key = 'expert_bias'
Comment thread
Jintao-Huang marked this conversation as resolved.
hf_shared_expert_key = 'shared_mlp'


register_model(ModelMeta(
ModelType.hy_v3,
['hy_v3'],
Comment thread
Jintao-Huang marked this conversation as resolved.
bridge_cls=HyV3Bridge,
))
2 changes: 0 additions & 2 deletions src/mcore_bridge/model/gpts/llm.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
# Copyright (c) ModelScope Contributors. All rights reserved.
from mcore_bridge.bridge import GPTBridge

from ..constant import ModelType
from ..register import ModelMeta, register_model

Expand Down
Loading