Skip to content

Commit 2bf3698

Browse files
committed
Better detection of Qwen-style LLM models
1 parent 653125e commit 2bf3698

1 file changed

Lines changed: 52 additions & 9 deletions

File tree

src/model_detection.cpp

Lines changed: 52 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -81,20 +81,16 @@ std::string inferModelTypeFromTensorKeys(const std::vector<std::string>& tensor_
8181
return "vae"; // Default to VAE if we can't determine
8282
}
8383

84-
// Check for LLM model indicators
85-
for (const std::string& name : tensor_keys) {
86-
if (name.find("blk.35.attn_k.weight") != std::string::npos ||
87-
name.find("model.layers.35.post_attention_layernorm.weight") != std::string::npos) {
88-
LOG_DEBUG("Detected LLM model");
89-
return "llm";
90-
}
91-
}
92-
9384
bool has_text_model = false;
9485
bool has_text_projection = false;
9586
bool has_position_ids = false;
9687
bool has_self_attention = false;
9788
bool has_dense_relu_dense = false;
89+
bool has_llm_token_embedding = false;
90+
bool has_llm_attention = false;
91+
bool has_llm_mlp = false;
92+
bool has_llm_output_norm = false;
93+
bool has_llm_qk_norm = false;
9894

9995
// Count transformer layers to distinguish CLIP-L (12 layers) from CLIP-G (32 layers)
10096
int max_layer_number = -1;
@@ -124,6 +120,44 @@ std::string inferModelTypeFromTensorKeys(const std::vector<std::string>& tensor_
124120
has_dense_relu_dense = true;
125121
}
126122

123+
// LLM model indicators. Support both raw GGUF naming and converted safetensors naming.
124+
if (name_lower.find("token_embd.weight") != std::string::npos ||
125+
name_lower.find("embed_tokens.weight") != std::string::npos) {
126+
has_llm_token_embedding = true;
127+
}
128+
if ((name_lower.find("blk.") != std::string::npos &&
129+
(name_lower.find("attn_q.weight") != std::string::npos ||
130+
name_lower.find("attn_k.weight") != std::string::npos ||
131+
name_lower.find("attn_v.weight") != std::string::npos ||
132+
name_lower.find("attn_output.weight") != std::string::npos)) ||
133+
(name_lower.find("model.layers.") != std::string::npos &&
134+
(name_lower.find("self_attn.q_proj.weight") != std::string::npos ||
135+
name_lower.find("self_attn.k_proj.weight") != std::string::npos ||
136+
name_lower.find("self_attn.v_proj.weight") != std::string::npos ||
137+
name_lower.find("self_attn.o_proj.weight") != std::string::npos))) {
138+
has_llm_attention = true;
139+
}
140+
if ((name_lower.find("blk.") != std::string::npos &&
141+
(name_lower.find("ffn_gate.weight") != std::string::npos ||
142+
name_lower.find("ffn_up.weight") != std::string::npos ||
143+
name_lower.find("ffn_down.weight") != std::string::npos)) ||
144+
(name_lower.find("model.layers.") != std::string::npos &&
145+
(name_lower.find("mlp.gate_proj.weight") != std::string::npos ||
146+
name_lower.find("mlp.up_proj.weight") != std::string::npos ||
147+
name_lower.find("mlp.down_proj.weight") != std::string::npos))) {
148+
has_llm_mlp = true;
149+
}
150+
if (name_lower.find("output_norm.weight") != std::string::npos ||
151+
name_lower.find("model.norm.weight") != std::string::npos) {
152+
has_llm_output_norm = true;
153+
}
154+
if (name_lower.find("attn_q_norm.weight") != std::string::npos ||
155+
name_lower.find("attn_k_norm.weight") != std::string::npos ||
156+
name_lower.find("self_attn.q_norm.weight") != std::string::npos ||
157+
name_lower.find("self_attn.k_norm.weight") != std::string::npos) {
158+
has_llm_qk_norm = true;
159+
}
160+
127161
// Extract layer numbers from tensor names
128162
// Look for patterns like "layers.11", "layer.31", "blocks.5", etc.
129163
if (name_lower.find("layer") != std::string::npos || name_lower.find("block") != std::string::npos) {
@@ -152,6 +186,15 @@ std::string inferModelTypeFromTensorKeys(const std::vector<std::string>& tensor_
152186
return "t5xxl";
153187
}
154188

189+
// Qwen3 and similar LLMs expose a transformer block structure with token embeddings,
190+
// attention projections, MLP projections, and a final output norm.
191+
if ((has_llm_token_embedding && has_llm_attention && has_llm_mlp) ||
192+
(has_llm_attention && has_llm_mlp && has_llm_output_norm) ||
193+
(has_llm_attention && has_llm_mlp && has_llm_qk_norm)) {
194+
LOG_DEBUG("Detected LLM model");
195+
return "llm";
196+
}
197+
155198
// If it's a CLIP model (has text model indicators)
156199
if (has_text_model || has_text_projection || has_position_ids) {
157200
// Distinguish between CLIP-L and CLIP-G based on layer count

0 commit comments

Comments
 (0)