@@ -126,18 +126,18 @@ const formulaData = {
126126// Formula Display Functions
127127// ============================================================
128128
129- function getFormulaInfo ( archType ) {
129+ function getFormulaInfo ( modelArch ) {
130130 let archKey = 'Standard' ;
131131
132- if ( archType . isDSA ) {
132+ if ( modelArch . isDSA ) {
133133 archKey = 'DSA' ;
134- } else if ( archType . isMLA ) {
134+ } else if ( modelArch . isMLA ) {
135135 archKey = 'MLA' ;
136- } else if ( archType . isGQA ) {
136+ } else if ( modelArch . isGQA ) {
137137 archKey = 'GQA' ;
138138 } else {
139- const kvHeads = archType . kv_heads || archType . num_key_value_heads ;
140- const attnHeads = archType . num_attention_heads ;
139+ const kvHeads = modelArch . kv_heads || modelArch . num_key_value_heads ;
140+ const attnHeads = modelArch . num_attention_heads ;
141141 if ( kvHeads === attnHeads ) {
142142 archKey = 'MHA' ;
143143 } else if ( kvHeads === 1 ) {
@@ -206,11 +206,11 @@ function updateFormulaReference(config) {
206206 return ;
207207 }
208208
209- const archType = detectArchitectureType ( config ) ;
210- archType . kv_heads = config . num_key_value_heads ;
211- archType . num_attention_heads = config . num_attention_heads ;
209+ const modelArch = detectArchitectureType ( config ) ;
210+ modelArch . kv_heads = config . num_key_value_heads ;
211+ modelArch . num_attention_heads = config . num_attention_heads ;
212212
213- const formulaInfo = getFormulaInfo ( archType ) ;
213+ const formulaInfo = getFormulaInfo ( modelArch ) ;
214214 container . innerHTML = generateFormulaCard ( formulaInfo ) ;
215215}
216216
@@ -555,8 +555,8 @@ async function calculateKVCache() {
555555 }
556556
557557 // Check if it's a hybrid model from Custom Model input
558- const archType = detectArchitectureType ( config ) ;
559- if ( archType . isHybridModel && currentModelSource === 'custom' ) {
558+ const modelArch = detectArchitectureType ( config ) ;
559+ if ( modelArch . isHybridModel && currentModelSource === 'custom' ) {
560560 showToast ( 'warning' , 'Hybrid Model Warning' ,
561561 'This appears to be a Hybrid model (e.g., DeepSeek V4, Qwen Hybrid). The calculation result may not be accurate. For Hybrid models, please use the Hybrid Models tab.' ) ;
562562 }
@@ -586,24 +586,31 @@ function performCalculation(config, tokens, dtype, modelName) {
586586 const dtypeSizes = { 'float32' : 4 , 'float16' : 2 , 'bfloat16' : 2 , 'int8' : 1 } ;
587587 const dtypeSize = dtypeSizes [ dtype ] || 2 ;
588588
589- const archType = detectArchitectureType ( config ) ;
589+ const modelArch = detectArchitectureType ( config ) ;
590590 const kvHeads = num_key_value_heads || num_attention_heads ;
591591 const hdim = head_dim || ( hidden_size / num_attention_heads ) ;
592592
593593 let totalElements ;
594594 let formula ;
595595
596- if ( archType . isDSA ) {
596+ if ( modelArch . isDSA ) {
597597 // DSA: MLA + Lightning Indexer
598598 const elementsPerToken = num_hidden_layers * ( kv_lora_rank + qk_rope_head_dim + index_head_dim ) / tp ;
599599 totalElements = elementsPerToken * tokens * batchSize ;
600600 formula = num_hidden_layers + ' × ' + tokens + ' × ' + batchSize + ' × (' + kv_lora_rank + ' + ' + qk_rope_head_dim + ' + ' + index_head_dim + ') ÷ ' + tp + ' × ' + dtypeSize + ' bytes' ;
601- } else if ( archType . isMLA ) {
601+ } else if ( modelArch . isMLA ) {
602602 // MLA: no factor 2
603603 const elementsPerToken = num_hidden_layers * ( kv_lora_rank + qk_rope_head_dim ) / tp ;
604604 totalElements = elementsPerToken * tokens * batchSize ;
605605 formula = num_hidden_layers + ' × ' + tokens + ' × ' + batchSize + ' × (' + kv_lora_rank + ' + ' + qk_rope_head_dim + ') ÷ ' + tp + ' × ' + dtypeSize + ' bytes' ;
606- } else if ( archType . isGQA ) {
606+ } else if ( modelArch . isHybridModel ) {
607+ // Hybrid Model: use GQA-like calculation but show warning
608+ // For hybrid models, use available head_dim or fallback to hidden_size calculation
609+ const effectiveHdim = hdim || ( hidden_size / num_attention_heads ) ;
610+ const elementsPerToken = 2 * num_hidden_layers * kvHeads * effectiveHdim / tp ;
611+ totalElements = elementsPerToken * tokens * batchSize ;
612+ formula = '2 × ' + num_hidden_layers + ' × ' + tokens + ' × ' + batchSize + ' × ' + kvHeads + ' × ' + effectiveHdim + ' ÷ ' + tp + ' × ' + dtypeSize + ' bytes (Hybrid - may not be accurate)' ;
613+ } else if ( modelArch . isGQA ) {
607614 // GQA with explicit head_dim
608615 const elementsPerToken = 2 * num_hidden_layers * kvHeads * hdim / tp ;
609616 totalElements = elementsPerToken * tokens * batchSize ;
@@ -626,11 +633,11 @@ function performCalculation(config, tokens, dtype, modelName) {
626633
627634 // Determine architecture type for display
628635 let architectureType ;
629- if ( archType . isDSA ) {
636+ if ( modelArch . isDSA ) {
630637 architectureType = 'DSA (DeepSeek Sparse Attention)' ;
631- } else if ( archType . isMLA ) {
638+ } else if ( modelArch . isMLA ) {
632639 architectureType = 'MLA (Multi-head Latent Attention)' ;
633- } else if ( archType . isHybridModel ) {
640+ } else if ( modelArch . isHybridModel ) {
634641 architectureType = 'Hybrid Model (Warning: result may not be accurate)' ;
635642 } else if ( kvHeads === num_attention_heads ) {
636643 architectureType = 'MHA (Multi-Head Attention)' ;
@@ -658,7 +665,7 @@ function performCalculation(config, tokens, dtype, modelName) {
658665 config,
659666 formula,
660667 architectureType,
661- showHybridWarning : archType . isHybridModel
668+ showHybridWarning : modelArch . isHybridModel
662669 } ;
663670}
664671
@@ -718,20 +725,20 @@ function calculateMaxTokensForMemory(config, gpuMemoryGiB, dtype, modelName) {
718725 const dtypeSizes = { 'float32' : 4 , 'float16' : 2 , 'bfloat16' : 2 , 'int8' : 1 } ;
719726 const dtypeSize = dtypeSizes [ dtype ] || 2 ;
720727
721- const archType = detectArchitectureType ( config ) ;
728+ const modelArch = detectArchitectureType ( config ) ;
722729 const kvHeads = num_key_value_heads || num_attention_heads ;
723730 const hdim = head_dim || ( hidden_size / num_attention_heads ) ;
724731
725732 let elementsPerToken ;
726733 let formula ;
727734
728- if ( archType . isDSA ) {
735+ if ( modelArch . isDSA ) {
729736 elementsPerToken = num_hidden_layers * ( kv_lora_rank + qk_rope_head_dim + index_head_dim ) / tp ;
730737 formula = num_hidden_layers + ' × (' + kv_lora_rank + ' + ' + qk_rope_head_dim + ' + ' + index_head_dim + ') ÷ ' + tp + ' × ' + dtypeSize + ' bytes' ;
731- } else if ( archType . isMLA ) {
738+ } else if ( modelArch . isMLA ) {
732739 elementsPerToken = num_hidden_layers * ( kv_lora_rank + qk_rope_head_dim ) / tp ;
733740 formula = num_hidden_layers + ' × (' + kv_lora_rank + ' + ' + qk_rope_head_dim + ') ÷ ' + tp + ' × ' + dtypeSize + ' bytes' ;
734- } else if ( archType . isGQA ) {
741+ } else if ( modelArch . isGQA ) {
735742 elementsPerToken = 2 * num_hidden_layers * kvHeads * hdim / tp ;
736743 formula = '2 × ' + num_hidden_layers + ' × ' + kvHeads + ' × ' + hdim + ' ÷ ' + tp + ' × ' + dtypeSize + ' bytes' ;
737744 } else {
@@ -743,8 +750,9 @@ function calculateMaxTokensForMemory(config, gpuMemoryGiB, dtype, modelName) {
743750 const maxTokens = Math . floor ( totalMemoryBytes / ( elementsPerToken * dtypeSize ) ) ;
744751
745752 let architectureType ;
746- if ( archType . isDSA ) architectureType = 'DSA' ;
747- else if ( archType . isMLA ) architectureType = 'MLA' ;
753+ if ( modelArch . isDSA ) architectureType = 'DSA' ;
754+ else if ( modelArch . isMLA ) architectureType = 'MLA' ;
755+ else if ( modelArch . isHybridModel ) architectureType = 'Hybrid Model' ;
748756 else if ( kvHeads === num_attention_heads ) architectureType = 'MHA' ;
749757 else if ( kvHeads === 1 ) architectureType = 'MQA' ;
750758 else architectureType = 'GQA' ;
@@ -760,6 +768,7 @@ function calculateMaxTokensForMemory(config, gpuMemoryGiB, dtype, modelName) {
760768 elementsPerToken,
761769 formula,
762770 architectureType,
771+ isHybridModel : modelArch . isHybridModel ,
763772 perTokenMemoryMiB : ( elementsPerToken * dtypeSize ) / Math . pow ( 1024 , 2 ) ,
764773 config
765774 } ;
@@ -1234,6 +1243,7 @@ async function fetchModelConfigFromUrl(url) {
12341243
12351244 const sourceConfig = configData . text_config || configData ;
12361245
1246+ // Preserve all fields including hybrid model indicators
12371247 const transformedConfig = {
12381248 hidden_size : sourceConfig . hidden_size ,
12391249 num_attention_heads : sourceConfig . num_attention_heads ,
@@ -1243,7 +1253,28 @@ async function fetchModelConfigFromUrl(url) {
12431253 qk_rope_head_dim : sourceConfig . qk_rope_head_dim ,
12441254 head_dim : sourceConfig . head_dim ,
12451255 index_head_dim : sourceConfig . index_head_dim ,
1246- compress_ratios : sourceConfig . compress_ratios ,
1256+ compress_ratios : sourceConfig . compress_ratios || configData . compress_ratios ,
1257+ // Hybrid model indicators
1258+ hybrid_layer_pattern : sourceConfig . hybrid_layer_pattern || configData . hybrid_layer_pattern ,
1259+ sliding_window : sourceConfig . sliding_window || configData . sliding_window ,
1260+ sliding_window_size : sourceConfig . sliding_window_size || configData . sliding_window_size ,
1261+ swa_num_key_value_heads : sourceConfig . swa_num_key_value_heads || configData . swa_num_key_value_heads ,
1262+ swa_num_attention_heads : sourceConfig . swa_num_attention_heads || configData . swa_num_attention_heads ,
1263+ swa_head_dim : sourceConfig . swa_head_dim || configData . swa_head_dim ,
1264+ add_swa_attention_sink_bias : sourceConfig . add_swa_attention_sink_bias || configData . add_swa_attention_sink_bias ,
1265+ layer_types : sourceConfig . layer_types ,
1266+ linear_attention : sourceConfig . linear_attention ,
1267+ linear_num_key_heads : sourceConfig . linear_num_key_heads ,
1268+ linear_key_head_dim : sourceConfig . linear_key_head_dim ,
1269+ global_head_dim : sourceConfig . global_head_dim ,
1270+ num_global_key_value_heads : sourceConfig . num_global_key_value_heads ,
1271+ window_attention : sourceConfig . window_attention || configData . window_attention ,
1272+ attention_window : sourceConfig . attention_window || configData . attention_window ,
1273+ mixed_attention : sourceConfig . mixed_attention || configData . mixed_attention ,
1274+ sparse_attention : sourceConfig . sparse_attention || configData . sparse_attention ,
1275+ full_attention_layers : sourceConfig . full_attention_layers || configData . full_attention_layers ,
1276+ sliding_attention_layers : sourceConfig . sliding_attention_layers || configData . sliding_attention_layers ,
1277+ linear_attention_layers : sourceConfig . linear_attention_layers || configData . linear_attention_layers ,
12471278 _modelName : modelIdentifier
12481279 } ;
12491280
@@ -1379,20 +1410,38 @@ function displayMaxTokensResults(result) {
13791410 const config = result . config ;
13801411 const kvHeads = config . num_key_value_heads || config . num_attention_heads ;
13811412
1413+ // Show toast warning for hybrid models (same as KV Cache calculation)
1414+ if ( result . isHybridModel ) {
1415+ showToast ( 'warning' , 'Hybrid Model Warning' ,
1416+ 'This appears to be a Hybrid model. The max tokens calculation may not be accurate. For Hybrid models, please use the Hybrid Models tab.' ) ;
1417+ }
1418+
13821419 resultsContainer . innerHTML = `
13831420 <div class="result-display" style="text-align: center; margin-bottom: 1rem;">
13841421 <div class="result-value" style="font-size: 1.8rem; font-weight: 700; color: var(--accent-success);">${ result . maxTokens . toLocaleString ( ) } </div>
13851422 <div class="result-label" style="font-size: 0.8rem; color: var(--text-secondary);">Max Tokens ${ result . tp > 1 ? '(TP=' + result . tp + ')' : '' } </div>
13861423 </div>
13871424
1425+ ${ result . isHybridModel ? `
1426+ <div style="background: rgba(245, 158, 11, 0.1); border: 1px solid var(--accent-warning); border-radius: 8px; padding: 0.75rem; margin-bottom: 1rem;">
1427+ <div style="display: flex; align-items: center; gap: 0.5rem; margin-bottom: 0.25rem;">
1428+ <span style="font-size: 1rem;">⚠️</span>
1429+ <strong style="color: var(--accent-warning); font-size: 0.85rem;">Hybrid Model Warning</strong>
1430+ </div>
1431+ <div style="font-size: 0.75rem; color: var(--text-secondary); line-height: 1.4;">
1432+ This appears to be a Hybrid model. The max tokens calculation may not be accurate. Please use the Hybrid Models tab for accurate results.
1433+ </div>
1434+ </div>
1435+ ` : '' }
1436+
13881437 <div class="metrics-row" style="display: flex; flex-wrap: wrap; gap: 0.75rem; margin-bottom: 1rem;">
13891438 <div class="metric-item">
13901439 <span style="color: var(--text-secondary);">Model:</span>
13911440 <strong style="color: var(--text-primary); margin-left: 0.25rem;">${ getModelDisplayName ( result . modelName ) } </strong>
13921441 </div>
13931442 <div class="metric-item">
13941443 <span style="color: var(--text-secondary);">Type:</span>
1395- <strong style="color: var(--text-primary); margin-left: 0.25rem;">${ result . architectureType } </strong>
1444+ <strong style="color: var(--text-primary); margin-left: 0.25rem;">${ result . isHybridModel ? 'Hybrid Model (Warning: result may not be accurate)' : result . architectureType } </strong>
13961445 </div>
13971446 <div class="metric-item">
13981447 <span style="color: var(--text-secondary);">GPU Memory:</span>
0 commit comments