diff --git a/AGENTS.md b/AGENTS.md index caaf3dca..27b4b267 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -138,6 +138,31 @@ Plus the wrapper `interpolateMetricAtInteractivity` in `packages/app/src/compone The Python file has a header comment explaining the pipeline and a `_cli()` entrypoint for stdin/stdout JSON usage. When you update it, keep the structure 1:1 with the TS so future readers can diff the two files line by line. Run the helper against a known dataset and confirm the outputs match what the chart renders before merging. +## Model Parameter Counts (verified) + +Authoritative total / active parameter counts for every model in the dashboard. Use these when updating `MODEL_CONFIG` labels in `packages/app/src/lib/data-mappings.ts` or any blog/docs prose. Verify against the HF model card before adding a new model — point releases (e.g. K2 → K2.5, GLM-4.5 → GLM-5) often keep or change sizes in non-obvious ways. + +| Model | Total | Active | HF ID | Source | +| ---------------------- | ----- | ----------- | ----------------------------------- | ---------------------------------- | +| DeepSeek-R1-0528 | 671B | 37B | `deepseek-ai/DeepSeek-R1-0528` | HF model card | +| DeepSeek-V4-Pro | 1.6T | 49B | `deepseek-ai/DeepSeek-V4-Pro` | HF model card | +| Kimi-K2.5 | 1T | 32B | `moonshotai/Kimi-K2.5` | HF model card | +| Kimi-K2.6 | 1T | 32B | `moonshotai/Kimi-K2.6` | HF model card | +| Qwen3.5-397B-A17B | 397B | 17B | `Qwen/Qwen3.5-397B-A17B` | HF model card | +| GLM-5 | 744B | 40B | `zai-org/GLM-5` | HF model card | +| GLM-5.1 | 744B | 40B | `zai-org/GLM-5.1-FP8` | HF model card (same base as GLM-5) | +| MiniMax-M2.5 | 230B | 10B | `MiniMaxAI/MiniMax-M2.5` | HF model card | +| MiniMax-M2.7 | 230B | 10B | `MiniMaxAI/MiniMax-M2.7` | NVIDIA M2.7 blog | +| gpt-oss-120b | 120B | 5.1B | `openai/gpt-oss-120b` | HF model card | +| Llama-3.3-70B-Instruct | 70B | 70B (dense) | `meta-llama/Llama-3.3-70B-Instruct` | HF model card | + +**Common mislabel traps** (have all bitten this repo at least once — do not repeat): + +- **GLM-5 ≠ 355B.** 355B is GLM-4.5. GLM-5 jumped to 744B / 40B active (256-expert MoE with DSA). +- **MiniMax-M2.5/M2.7 ≠ 456B.** 456B is the older MiniMax-Text-01 / M1 (32 large experts). The M2 series is a different architecture: 230B / 10B active, 256 small experts. +- **DeepSeek-R1 is 671B, not 685B.** HF metadata shows 685B because the bundled MTP head adds ~14B; the core MoE is 671B / 37B active. +- **Kimi K2.5 and K2.6 are post-training refinements**, not new pre-trained sizes. Same 1T / 32B / 384-expert backbone as the original K2. + ## Common Development Tasks ### Modify chart appearance/behavior diff --git a/packages/app/cypress/e2e/model-architecture.cy.ts b/packages/app/cypress/e2e/model-architecture.cy.ts index f0646c4e..dd8d3722 100644 --- a/packages/app/cypress/e2e/model-architecture.cy.ts +++ b/packages/app/cypress/e2e/model-architecture.cy.ts @@ -150,7 +150,7 @@ describe('Model Architecture Diagram', () => { doc.body.style.removeProperty('pointer-events'); }); cy.get('[role="combobox"]').filter(':visible').first().click(); - cy.get('[role="option"]').contains('Kimi K2.5').click(); + cy.get('[role="option"]').contains('Kimi K2.5/2.6').click(); cy.get('[data-testid="model-architecture-toggle"]').should('be.visible'); cy.get('body').then(($body) => { @@ -198,7 +198,7 @@ describe('Model Architecture Diagram', () => { doc.body.style.removeProperty('pointer-events'); }); cy.get('[role="combobox"]').filter(':visible').first().click(); - cy.get('[role="option"]').contains('MiniMax M2.5').click(); + cy.get('[role="option"]').contains('MiniMax M2.5/2.7').click(); cy.get('[data-testid="model-architecture-toggle"]').should('be.visible'); cy.get('body').then(($body) => { diff --git a/packages/app/src/components/about/faq-data.ts b/packages/app/src/components/about/faq-data.ts index c273756c..84ec6d29 100644 --- a/packages/app/src/components/about/faq-data.ts +++ b/packages/app/src/components/about/faq-data.ts @@ -31,7 +31,15 @@ const gpusByVendor = [...GPU_KEYS].reduce>((acc, key) = (acc[vendor] ??= []).push(key.toUpperCase()); return acc; }, {}); -const modelNames = Object.values(DB_MODEL_TO_DISPLAY); +// /about lists each DB bucket as its own bullet, so point releases that +// elsewhere collapse under one display name (see DB_MODEL_TO_DISPLAY) are +// expanded back out here. +const modelNames = Object.values({ + ...DB_MODEL_TO_DISPLAY, + 'kimik2.6': 'Kimi-K2.6', + 'minimaxm2.7': 'MiniMax-M2.7', + 'glm5.1': 'GLM-5.1', +}); const frameworkNames = [...new Set(Object.values(FRAMEWORK_LABELS))].map((n) => n.replace(/[¹²³⁴⁵⁶⁷⁸⁹⁰]+$/u, ''), diff --git a/packages/app/src/lib/data-mappings.test.ts b/packages/app/src/lib/data-mappings.test.ts index d96d28a6..dd5b0c95 100644 --- a/packages/app/src/lib/data-mappings.test.ts +++ b/packages/app/src/lib/data-mappings.test.ts @@ -184,12 +184,13 @@ describe('getModelLabel', () => { it('returns correct label for each known model', () => { expect(getModelLabel(Model.Llama3_3_70B)).toBe('Llama 3.3 70B Instruct'); expect(getModelLabel(Model.Llama3_1_70B)).toBe('Llama 3.1 70B Instruct'); - expect(getModelLabel(Model.DeepSeek_R1)).toBe('DeepSeek R1 0528'); + expect(getModelLabel(Model.DeepSeek_R1)).toBe('DeepSeek R1 0528 671B'); + expect(getModelLabel(Model.DeepSeek_V4_Pro)).toBe('DeepSeek V4 Pro 1.6T'); expect(getModelLabel(Model.GptOss)).toBe('gpt-oss 120B'); - expect(getModelLabel(Model.Qwen3_5)).toBe('Qwen3.5'); - expect(getModelLabel(Model.Kimi_K2_5)).toBe('Kimi K2.5'); - expect(getModelLabel(Model.GLM_5)).toBe('GLM5/5.1'); - expect(getModelLabel(Model.MiniMax_M2_5)).toBe('MiniMax M2.5'); + expect(getModelLabel(Model.Qwen3_5)).toBe('Qwen3.5 397B'); + expect(getModelLabel(Model.Kimi_K2_5)).toBe('Kimi K2.5/2.6 1T'); + expect(getModelLabel(Model.GLM_5)).toBe('GLM5/5.1 744B'); + expect(getModelLabel(Model.MiniMax_M2_5)).toBe('MiniMax M2.5/2.7 230B'); }); it('falls back to the model value for unknown model', () => { diff --git a/packages/app/src/lib/data-mappings.ts b/packages/app/src/lib/data-mappings.ts index 6a543925..62d16ce1 100644 --- a/packages/app/src/lib/data-mappings.ts +++ b/packages/app/src/lib/data-mappings.ts @@ -48,23 +48,32 @@ interface ModelConfig { mtpEngineExclusion?: boolean; } +// Total parameter counts appended to each label so users can compare model +// scale at a glance in the dropdown. For Llama and gpt-oss the count is +// already part of the canonical name (Llama 3.3 70B, gpt-oss 120B) so no +// duplication needed. const MODEL_CONFIG: Record = { - [Model.DeepSeek_R1]: { label: 'DeepSeek R1 0528', prefix: 'dsr1', category: 'default' }, + [Model.DeepSeek_R1]: { label: 'DeepSeek R1 0528 671B', prefix: 'dsr1', category: 'default' }, [Model.DeepSeek_V4_Pro]: { - label: 'DeepSeek V4 Pro', + label: 'DeepSeek V4 Pro 1.6T', prefix: 'dsv4', category: 'default', mtpEngineExclusion: true, }, [Model.Kimi_K2_5]: { - label: 'Kimi K2.5', + // K2.5 and K2.6 share an architecture, so the dropdown surfaces both + // versions joined with a slash — matches the GLM5/5.1 pattern. The + // hyphenated `Model.Kimi_K2_5` enum value stays as-is for internal + // routing / DB key mapping. + label: 'Kimi K2.5/2.6 1T', prefix: 'kimik2.5', category: 'default', }, - [Model.Qwen3_5]: { label: 'Qwen3.5', prefix: 'qwen3.5', category: 'default' }, - [Model.GLM_5]: { label: 'GLM5/5.1', prefix: 'glm5', category: 'default' }, + [Model.Qwen3_5]: { label: 'Qwen3.5 397B', prefix: 'qwen3.5', category: 'default' }, + [Model.GLM_5]: { label: 'GLM5/5.1 744B', prefix: 'glm5', category: 'default' }, [Model.MiniMax_M2_5]: { - label: 'MiniMax M2.5', + // M2.5 and M2.7 share an architecture — same GLM5/5.1 pattern as Kimi. + label: 'MiniMax M2.5/2.7 230B', prefix: 'minimaxm2.5', category: 'default', },