Skip to content

Commit f617388

Browse files
committed
bugfix: Hybrid Model warning and cleancode
1 parent 63bc3f7 commit f617388

1 file changed

Lines changed: 77 additions & 28 deletions

File tree

docs/source/_static/calculator.js

Lines changed: 77 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -126,18 +126,18 @@ const formulaData = {
126126
// Formula Display Functions
127127
// ============================================================
128128

129-
function getFormulaInfo(archType) {
129+
function getFormulaInfo(modelArch) {
130130
let archKey = 'Standard';
131131

132-
if (archType.isDSA) {
132+
if (modelArch.isDSA) {
133133
archKey = 'DSA';
134-
} else if (archType.isMLA) {
134+
} else if (modelArch.isMLA) {
135135
archKey = 'MLA';
136-
} else if (archType.isGQA) {
136+
} else if (modelArch.isGQA) {
137137
archKey = 'GQA';
138138
} else {
139-
const kvHeads = archType.kv_heads || archType.num_key_value_heads;
140-
const attnHeads = archType.num_attention_heads;
139+
const kvHeads = modelArch.kv_heads || modelArch.num_key_value_heads;
140+
const attnHeads = modelArch.num_attention_heads;
141141
if (kvHeads === attnHeads) {
142142
archKey = 'MHA';
143143
} else if (kvHeads === 1) {
@@ -206,11 +206,11 @@ function updateFormulaReference(config) {
206206
return;
207207
}
208208

209-
const archType = detectArchitectureType(config);
210-
archType.kv_heads = config.num_key_value_heads;
211-
archType.num_attention_heads = config.num_attention_heads;
209+
const modelArch = detectArchitectureType(config);
210+
modelArch.kv_heads = config.num_key_value_heads;
211+
modelArch.num_attention_heads = config.num_attention_heads;
212212

213-
const formulaInfo = getFormulaInfo(archType);
213+
const formulaInfo = getFormulaInfo(modelArch);
214214
container.innerHTML = generateFormulaCard(formulaInfo);
215215
}
216216

@@ -555,8 +555,8 @@ async function calculateKVCache() {
555555
}
556556

557557
// Check if it's a hybrid model from Custom Model input
558-
const archType = detectArchitectureType(config);
559-
if (archType.isHybridModel && currentModelSource === 'custom') {
558+
const modelArch = detectArchitectureType(config);
559+
if (modelArch.isHybridModel && currentModelSource === 'custom') {
560560
showToast('warning', 'Hybrid Model Warning',
561561
'This appears to be a Hybrid model (e.g., DeepSeek V4, Qwen Hybrid). The calculation result may not be accurate. For Hybrid models, please use the Hybrid Models tab.');
562562
}
@@ -586,24 +586,31 @@ function performCalculation(config, tokens, dtype, modelName) {
586586
const dtypeSizes = { 'float32': 4, 'float16': 2, 'bfloat16': 2, 'int8': 1 };
587587
const dtypeSize = dtypeSizes[dtype] || 2;
588588

589-
const archType = detectArchitectureType(config);
589+
const modelArch = detectArchitectureType(config);
590590
const kvHeads = num_key_value_heads || num_attention_heads;
591591
const hdim = head_dim || (hidden_size / num_attention_heads);
592592

593593
let totalElements;
594594
let formula;
595595

596-
if (archType.isDSA) {
596+
if (modelArch.isDSA) {
597597
// DSA: MLA + Lightning Indexer
598598
const elementsPerToken = num_hidden_layers * (kv_lora_rank + qk_rope_head_dim + index_head_dim) / tp;
599599
totalElements = elementsPerToken * tokens * batchSize;
600600
formula = num_hidden_layers + ' × ' + tokens + ' × ' + batchSize + ' × (' + kv_lora_rank + ' + ' + qk_rope_head_dim + ' + ' + index_head_dim + ') ÷ ' + tp + ' × ' + dtypeSize + ' bytes';
601-
} else if (archType.isMLA) {
601+
} else if (modelArch.isMLA) {
602602
// MLA: no factor 2
603603
const elementsPerToken = num_hidden_layers * (kv_lora_rank + qk_rope_head_dim) / tp;
604604
totalElements = elementsPerToken * tokens * batchSize;
605605
formula = num_hidden_layers + ' × ' + tokens + ' × ' + batchSize + ' × (' + kv_lora_rank + ' + ' + qk_rope_head_dim + ') ÷ ' + tp + ' × ' + dtypeSize + ' bytes';
606-
} else if (archType.isGQA) {
606+
} else if (modelArch.isHybridModel) {
607+
// Hybrid Model: use GQA-like calculation but show warning
608+
// For hybrid models, use available head_dim or fallback to hidden_size calculation
609+
const effectiveHdim = hdim || (hidden_size / num_attention_heads);
610+
const elementsPerToken = 2 * num_hidden_layers * kvHeads * effectiveHdim / tp;
611+
totalElements = elementsPerToken * tokens * batchSize;
612+
formula = '2 × ' + num_hidden_layers + ' × ' + tokens + ' × ' + batchSize + ' × ' + kvHeads + ' × ' + effectiveHdim + ' ÷ ' + tp + ' × ' + dtypeSize + ' bytes (Hybrid - may not be accurate)';
613+
} else if (modelArch.isGQA) {
607614
// GQA with explicit head_dim
608615
const elementsPerToken = 2 * num_hidden_layers * kvHeads * hdim / tp;
609616
totalElements = elementsPerToken * tokens * batchSize;
@@ -626,11 +633,11 @@ function performCalculation(config, tokens, dtype, modelName) {
626633

627634
// Determine architecture type for display
628635
let architectureType;
629-
if (archType.isDSA) {
636+
if (modelArch.isDSA) {
630637
architectureType = 'DSA (DeepSeek Sparse Attention)';
631-
} else if (archType.isMLA) {
638+
} else if (modelArch.isMLA) {
632639
architectureType = 'MLA (Multi-head Latent Attention)';
633-
} else if (archType.isHybridModel) {
640+
} else if (modelArch.isHybridModel) {
634641
architectureType = 'Hybrid Model (Warning: result may not be accurate)';
635642
} else if (kvHeads === num_attention_heads) {
636643
architectureType = 'MHA (Multi-Head Attention)';
@@ -658,7 +665,7 @@ function performCalculation(config, tokens, dtype, modelName) {
658665
config,
659666
formula,
660667
architectureType,
661-
showHybridWarning: archType.isHybridModel
668+
showHybridWarning: modelArch.isHybridModel
662669
};
663670
}
664671

@@ -718,20 +725,20 @@ function calculateMaxTokensForMemory(config, gpuMemoryGiB, dtype, modelName) {
718725
const dtypeSizes = { 'float32': 4, 'float16': 2, 'bfloat16': 2, 'int8': 1 };
719726
const dtypeSize = dtypeSizes[dtype] || 2;
720727

721-
const archType = detectArchitectureType(config);
728+
const modelArch = detectArchitectureType(config);
722729
const kvHeads = num_key_value_heads || num_attention_heads;
723730
const hdim = head_dim || (hidden_size / num_attention_heads);
724731

725732
let elementsPerToken;
726733
let formula;
727734

728-
if (archType.isDSA) {
735+
if (modelArch.isDSA) {
729736
elementsPerToken = num_hidden_layers * (kv_lora_rank + qk_rope_head_dim + index_head_dim) / tp;
730737
formula = num_hidden_layers + ' × (' + kv_lora_rank + ' + ' + qk_rope_head_dim + ' + ' + index_head_dim + ') ÷ ' + tp + ' × ' + dtypeSize + ' bytes';
731-
} else if (archType.isMLA) {
738+
} else if (modelArch.isMLA) {
732739
elementsPerToken = num_hidden_layers * (kv_lora_rank + qk_rope_head_dim) / tp;
733740
formula = num_hidden_layers + ' × (' + kv_lora_rank + ' + ' + qk_rope_head_dim + ') ÷ ' + tp + ' × ' + dtypeSize + ' bytes';
734-
} else if (archType.isGQA) {
741+
} else if (modelArch.isGQA) {
735742
elementsPerToken = 2 * num_hidden_layers * kvHeads * hdim / tp;
736743
formula = '2 × ' + num_hidden_layers + ' × ' + kvHeads + ' × ' + hdim + ' ÷ ' + tp + ' × ' + dtypeSize + ' bytes';
737744
} else {
@@ -743,8 +750,9 @@ function calculateMaxTokensForMemory(config, gpuMemoryGiB, dtype, modelName) {
743750
const maxTokens = Math.floor(totalMemoryBytes / (elementsPerToken * dtypeSize));
744751

745752
let architectureType;
746-
if (archType.isDSA) architectureType = 'DSA';
747-
else if (archType.isMLA) architectureType = 'MLA';
753+
if (modelArch.isDSA) architectureType = 'DSA';
754+
else if (modelArch.isMLA) architectureType = 'MLA';
755+
else if (modelArch.isHybridModel) architectureType = 'Hybrid Model';
748756
else if (kvHeads === num_attention_heads) architectureType = 'MHA';
749757
else if (kvHeads === 1) architectureType = 'MQA';
750758
else architectureType = 'GQA';
@@ -760,6 +768,7 @@ function calculateMaxTokensForMemory(config, gpuMemoryGiB, dtype, modelName) {
760768
elementsPerToken,
761769
formula,
762770
architectureType,
771+
isHybridModel: modelArch.isHybridModel,
763772
perTokenMemoryMiB: (elementsPerToken * dtypeSize) / Math.pow(1024, 2),
764773
config
765774
};
@@ -1234,6 +1243,7 @@ async function fetchModelConfigFromUrl(url) {
12341243

12351244
const sourceConfig = configData.text_config || configData;
12361245

1246+
// Preserve all fields including hybrid model indicators
12371247
const transformedConfig = {
12381248
hidden_size: sourceConfig.hidden_size,
12391249
num_attention_heads: sourceConfig.num_attention_heads,
@@ -1243,7 +1253,28 @@ async function fetchModelConfigFromUrl(url) {
12431253
qk_rope_head_dim: sourceConfig.qk_rope_head_dim,
12441254
head_dim: sourceConfig.head_dim,
12451255
index_head_dim: sourceConfig.index_head_dim,
1246-
compress_ratios: sourceConfig.compress_ratios,
1256+
compress_ratios: sourceConfig.compress_ratios || configData.compress_ratios,
1257+
// Hybrid model indicators
1258+
hybrid_layer_pattern: sourceConfig.hybrid_layer_pattern || configData.hybrid_layer_pattern,
1259+
sliding_window: sourceConfig.sliding_window || configData.sliding_window,
1260+
sliding_window_size: sourceConfig.sliding_window_size || configData.sliding_window_size,
1261+
swa_num_key_value_heads: sourceConfig.swa_num_key_value_heads || configData.swa_num_key_value_heads,
1262+
swa_num_attention_heads: sourceConfig.swa_num_attention_heads || configData.swa_num_attention_heads,
1263+
swa_head_dim: sourceConfig.swa_head_dim || configData.swa_head_dim,
1264+
add_swa_attention_sink_bias: sourceConfig.add_swa_attention_sink_bias || configData.add_swa_attention_sink_bias,
1265+
layer_types: sourceConfig.layer_types,
1266+
linear_attention: sourceConfig.linear_attention,
1267+
linear_num_key_heads: sourceConfig.linear_num_key_heads,
1268+
linear_key_head_dim: sourceConfig.linear_key_head_dim,
1269+
global_head_dim: sourceConfig.global_head_dim,
1270+
num_global_key_value_heads: sourceConfig.num_global_key_value_heads,
1271+
window_attention: sourceConfig.window_attention || configData.window_attention,
1272+
attention_window: sourceConfig.attention_window || configData.attention_window,
1273+
mixed_attention: sourceConfig.mixed_attention || configData.mixed_attention,
1274+
sparse_attention: sourceConfig.sparse_attention || configData.sparse_attention,
1275+
full_attention_layers: sourceConfig.full_attention_layers || configData.full_attention_layers,
1276+
sliding_attention_layers: sourceConfig.sliding_attention_layers || configData.sliding_attention_layers,
1277+
linear_attention_layers: sourceConfig.linear_attention_layers || configData.linear_attention_layers,
12471278
_modelName: modelIdentifier
12481279
};
12491280

@@ -1379,20 +1410,38 @@ function displayMaxTokensResults(result) {
13791410
const config = result.config;
13801411
const kvHeads = config.num_key_value_heads || config.num_attention_heads;
13811412

1413+
// Show toast warning for hybrid models (same as KV Cache calculation)
1414+
if (result.isHybridModel) {
1415+
showToast('warning', 'Hybrid Model Warning',
1416+
'This appears to be a Hybrid model. The max tokens calculation may not be accurate. For Hybrid models, please use the Hybrid Models tab.');
1417+
}
1418+
13821419
resultsContainer.innerHTML = `
13831420
<div class="result-display" style="text-align: center; margin-bottom: 1rem;">
13841421
<div class="result-value" style="font-size: 1.8rem; font-weight: 700; color: var(--accent-success);">${result.maxTokens.toLocaleString()}</div>
13851422
<div class="result-label" style="font-size: 0.8rem; color: var(--text-secondary);">Max Tokens ${result.tp > 1 ? '(TP=' + result.tp + ')' : ''}</div>
13861423
</div>
13871424
1425+
${result.isHybridModel ? `
1426+
<div style="background: rgba(245, 158, 11, 0.1); border: 1px solid var(--accent-warning); border-radius: 8px; padding: 0.75rem; margin-bottom: 1rem;">
1427+
<div style="display: flex; align-items: center; gap: 0.5rem; margin-bottom: 0.25rem;">
1428+
<span style="font-size: 1rem;">⚠️</span>
1429+
<strong style="color: var(--accent-warning); font-size: 0.85rem;">Hybrid Model Warning</strong>
1430+
</div>
1431+
<div style="font-size: 0.75rem; color: var(--text-secondary); line-height: 1.4;">
1432+
This appears to be a Hybrid model. The max tokens calculation may not be accurate. Please use the Hybrid Models tab for accurate results.
1433+
</div>
1434+
</div>
1435+
` : ''}
1436+
13881437
<div class="metrics-row" style="display: flex; flex-wrap: wrap; gap: 0.75rem; margin-bottom: 1rem;">
13891438
<div class="metric-item">
13901439
<span style="color: var(--text-secondary);">Model:</span>
13911440
<strong style="color: var(--text-primary); margin-left: 0.25rem;">${getModelDisplayName(result.modelName)}</strong>
13921441
</div>
13931442
<div class="metric-item">
13941443
<span style="color: var(--text-secondary);">Type:</span>
1395-
<strong style="color: var(--text-primary); margin-left: 0.25rem;">${result.architectureType}</strong>
1444+
<strong style="color: var(--text-primary); margin-left: 0.25rem;">${result.isHybridModel ? 'Hybrid Model (Warning: result may not be accurate)' : result.architectureType}</strong>
13961445
</div>
13971446
<div class="metric-item">
13981447
<span style="color: var(--text-secondary);">GPU Memory:</span>

0 commit comments

Comments
 (0)