Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 85 additions & 0 deletions packages/app/cypress/component/scatter-graph.cy.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -246,4 +246,89 @@ describe('ScatterGraph', () => {
.find('text')
.should('contain.text', 'feature-branch');
});

it('renders M3 mtp rooflines with the EAGLE label (official + overlay)', () => {
const interactivityChartDef = createMockChartDefinition({
chartType: 'interactivity',
y_tpPerGpu_roofline: 'upper_left',
});
const officialData = [
createMockInferenceData({ hwKey: 'h100_vllm_mtp', x: 8, y: 240, precision: Precision.FP4 }),
createMockInferenceData({ hwKey: 'h100_vllm_mtp', x: 16, y: 200, precision: Precision.FP4 }),
createMockInferenceData({ hwKey: 'h100_vllm_mtp', x: 32, y: 150, precision: Precision.FP4 }),
];
// Overlay roofline with no run metadata, so its line label falls back to the
// hw label — exercising the overlay path's model-aware suffix resolution.
const runUrl = 'https://github.com/x/y/actions/runs/999';
const overlayData = {
data: [
createMockInferenceData({
hwKey: 'b200_vllm_mtp',
x: 8,
y: 320,
precision: Precision.FP4,
run_url: runUrl,
}),
createMockInferenceData({
hwKey: 'b200_vllm_mtp',
x: 16,
y: 280,
precision: Precision.FP4,
run_url: runUrl,
}),
createMockInferenceData({
hwKey: 'b200_vllm_mtp',
x: 32,
y: 220,
precision: Precision.FP4,
run_url: runUrl,
}),
],
hardwareConfig: hwConfig,
label: '',
runUrl,
};

mountWithProviders(
<div style={{ width: 800, height: 600 }}>
<ScatterGraph
chartId="test-scatter-m3-eagle"
modelLabel="MiniMax-M3"
data={officialData}
xLabel="Concurrency"
yLabel="Throughput / GPU (tok/s)"
chartDefinition={interactivityChartDef}
overlayData={overlayData}
/>
</div>,
{
inference: {
hardwareConfig: hwConfig,
activeHwTypes: new Set(['h100_vllm_mtp']),
hwTypesWithData: new Set(['h100_vllm_mtp']),
selectedPrecisions: [Precision.FP4],
showLineLabels: true,
},
unofficial: {
activeOverlayHwTypes: new Set(['b200_vllm_mtp']),
allOverlayHwTypes: new Set(['b200_vllm_mtp']),
runIndexByUrl: { [runUrl]: 0, '999': 0 },
// Intentionally empty so the overlay label falls back to the hw label.
unofficialRunInfos: [],
},
},
);

// Official roofline label reads "EAGLE", not the generic "MTP".
cy.get('#test-scatter-m3-eagle svg .line-label')
.filter('[data-line-key]:not([data-line-key^="overlay-"])')
.find('text')
.should('contain.text', 'EAGLE');
// Overlay roofline (no run metadata → hw-label fallback) also reads "EAGLE".
cy.get('#test-scatter-m3-eagle svg .line-label[data-line-key^="overlay-"]')
.find('text')
.should('contain.text', 'EAGLE');
// No label should show the generic MTP token for M3.
cy.get('#test-scatter-m3-eagle svg .line-label text').should('not.contain.text', 'MTP');
});
});
26 changes: 22 additions & 4 deletions packages/app/cypress/component/submissions-table.cy.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,36 @@ describe('SubmissionsTable — Spec Method column', () => {
cy.contains('th', 'Spec Method').should('be.visible');
});

it('renders spec_method values uppercased and shows an em-dash for "none"', () => {
it('renders canonical spec_method labels and shows an em-dash for "none"', () => {
cy.mount(<SubmissionsTable data={rows} />);
// CSS uppercases the value; the DOM text remains lowercase.
cy.contains('td', 'mtp').should('be.visible').and('have.class', 'uppercase');
cy.contains('td', 'eagle').should('be.visible').and('have.class', 'uppercase');
// The cell renders the canonical spec-method label (MTP/EAGLE) for the model.
cy.contains('td', 'MTP').should('be.visible').and('have.class', 'uppercase');
cy.contains('td', 'EAGLE').should('be.visible').and('have.class', 'uppercase');
// The "none" row renders an em-dash placeholder instead of literal "none".
// Hardware text is rendered uppercase via .toUpperCase().
cy.contains('tbody tr', 'MI355X').within(() => {
cy.contains('—').should('be.visible');
});
});

it('renders M3 mtp as EAGLE, not MTP', () => {
cy.mount(
<SubmissionsTable
data={[
{
...baseRow,
model: 'minimaxm3',
hardware: 'b200',
spec_method: 'mtp',
date: '2026-05-10',
},
]}
/>,
);
cy.contains('td', 'EAGLE').should('be.visible');
cy.contains('td', 'MTP').should('not.exist');
});

it('sorts by spec_method when the header is clicked', () => {
cy.mount(<SubmissionsTable data={rows} />);
// Desc alphabetical: 'none' (mi355x) → 'mtp' (h200) → 'eagle' (b300).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ export function useThroughputData(

const entry = rowToAggDataEntry(row);
const hwKey = getHardwareKey(entry);
const hwConfig = getHardwareConfig(hwKey);
const hwConfig = getHardwareConfig(hwKey, entry.model);
if (!hwConfig) continue;

if (!hwConfigMap[hwKey]) hwConfigMap[hwKey] = { ...hwConfig, name: hwKey };
Expand Down
16 changes: 12 additions & 4 deletions packages/app/src/components/evaluation/chart-data.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import { DISPLAY_MODEL_TO_DB } from '@semianalysisai/inferencex-constants';
import {
DISPLAY_MODEL_TO_DB,
resolveFrameworkPartLabel,
} from '@semianalysisai/inferencex-constants';

import type { EvalChangelogEntry, EvaluationChartData } from '@/components/evaluation/types';
import type { EvalRow } from '@/lib/api';
Expand Down Expand Up @@ -54,10 +57,13 @@ function buildConfigLabel(
conc: number | null,
params: EvalLabelParams,
showPrecision: boolean,
model?: string,
): string {
const headerSuffixes: string[] = [];
if (framework && framework !== '1k8k') headerSuffixes.push(getFrameworkLabel(framework));
if (specMethod && specMethod !== 'none') headerSuffixes.push(getFrameworkLabel(specMethod));
// M3's `mtp` spec token renders as "EAGLE"; every other model keeps "MTP".
if (specMethod && specMethod !== 'none')
headerSuffixes.push(resolveFrameworkPartLabel(model, specMethod));

const detailSuffixes: string[] = [];
if (precision && showPrecision) detailSuffixes.push(precision.toUpperCase());
Expand Down Expand Up @@ -128,7 +134,7 @@ export function buildEvaluationChartRows(
return null;
}

const hwConfig = getHardwareConfig(hwKey);
const hwConfig = getHardwareConfig(hwKey, selectedModel);
const hwLabel = hwConfig.label;

return {
Expand All @@ -154,6 +160,7 @@ export function buildEvaluationChartRows(
prefillNw: item.prefill_num_workers,
},
showPrecision,
selectedModel,
),
score,
scoreError: item.metrics.em_strict_se ?? item.metrics.score_se ?? 0,
Expand Down Expand Up @@ -290,7 +297,7 @@ export function buildEvalChangelogEntries(
})
.map((item) => {
const hwKey = normalizeEvalHardwareKey(item.hardware, item.framework, item.spec_method);
const hwConfig = getHardwareConfig(hwKey);
const hwConfig = getHardwareConfig(hwKey, selectedModel);
const hwLabel = hwConfig.label;
// Changelog labels historically omit TP/EP; keep that behavior while
// still surfacing the disagg marker.
Expand All @@ -308,6 +315,7 @@ export function buildEvalChangelogEntries(
decodeDpa: item.decode_dp_attention,
},
showPrecision,
selectedModel,
),
};
});
Expand Down
4 changes: 2 additions & 2 deletions packages/app/src/components/inference/InferenceContext.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -311,9 +311,9 @@ export function InferenceProvider({
.toSorted((a, b) => getModelSortIndex(a) - getModelSortIndex(b) || a.localeCompare(b))
.map((hw) => ({
value: hw,
label: getDisplayLabel(getHardwareConfig(hw)),
label: getDisplayLabel(getHardwareConfig(hw, selectedModel)),
}));
}, [availabilityRows, dbModelKeys, effectiveSequence, effectivePrecisions]);
}, [availabilityRows, dbModelKeys, effectiveSequence, effectivePrecisions, selectedModel]);

// --- Tracked config functions ---
const buildTrackedConfigId = useCallback((point: InferenceData): string => {
Expand Down
5 changes: 4 additions & 1 deletion packages/app/src/components/inference/ui/ChartDisplay.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,10 @@ export default function ChartDisplay() {
...visibleData,
...visibleOverlayRows,
]).map((issue) =>
knownIssueCsvNote(issue, getDisplayLabel(getHardwareConfig(issue.hwKey))),
knownIssueCsvNote(
issue,
getDisplayLabel(getHardwareConfig(issue.hwKey, graph.model)),
),
);
exportToCsv(
`InferenceX_${selectedModel}_${graph.chartDefinition.chartType}`,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import { useMemo, useState } from 'react';
import { track } from '@/lib/analytics';
import { ExternalLinkIcon } from '@/components/ui/external-link-icon';

import { DB_MODEL_TO_DISPLAY } from '@semianalysisai/inferencex-constants';

import type { ComparisonChangelog as ComparisonChangelogType } from '@/hooks/api/use-comparison-changelogs';
import {
configKeyMatchesHwKey,
Expand Down Expand Up @@ -208,14 +210,18 @@ export default function ComparisonChangelog({
track('inference_comparison_changelog_toggled', { expanded: newState });
};

// All modelDbKeys for a comparison map to one display model, so [0] suffices
// for per-model suffix overrides (e.g. M3 MTP → EAGLE).
const displayModel = DB_MODEL_TO_DISPLAY[modelDbKeys[0]] ?? modelDbKeys[0];

/** Display labels of the selected GPUs that a set of changelog entries touches. */
const gpuLabelsFor = (entries: { config_keys: string[] }[]): string => {
if (selectedGPUs.length <= 1) return '';
return selectedGPUs
.filter((gpu) =>
entries.some((e) => e.config_keys.some((k) => configKeyMatchesHwKey(k, gpu))),
)
.map((gpu) => getDisplayLabel(getHardwareConfig(gpu)))
.map((gpu) => getDisplayLabel(getHardwareConfig(gpu, displayModel)))
.join(', ');
};

Expand Down
6 changes: 3 additions & 3 deletions packages/app/src/components/inference/ui/GPUGraph.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ const CHART_MARGIN = { top: 24, right: 10, bottom: 60, left: 60 };
// lookup misses (legacy data).
function labelTextFor(pts: InferenceData[], numbering: Map<string, number>): string {
const hwKey = String(pts[0].hwKey);
const cfg = getHardwareConfig(hwKey);
const cfg = getHardwareConfig(hwKey, pts[0].model);
const hwLabel = cfg ? getDisplayLabel(cfg) : hwKey;
return `${hwLabel} • ${comparisonEntryLabel(String(pts[0].date), numbering)}`;
}
Expand Down Expand Up @@ -266,7 +266,7 @@ const GPUGraph = React.memo(
const knownIssueAnnotations = useMemo(
(): KnownIssueAnnotation[] =>
matchKnownConfigIssues(modelLabel, filteredData).map((issue) => {
const cfg = getHardwareConfig(issue.hwKey);
const cfg = getHardwareConfig(issue.hwKey, modelLabel);
const colorEntry = allGraphs.find(
(entry) => entry.hwKey === issue.hwKey && activeDates.has(entry.id),
);
Expand Down Expand Up @@ -835,7 +835,7 @@ const GPUGraph = React.memo(
hw: id,
label: comparisonEntryLabel(date, runNumbering),
color,
title: getDisplayLabel(getHardwareConfig(hwKey)),
title: getDisplayLabel(getHardwareConfig(hwKey, modelLabel)),
isActive: activeDates.has(id),
onClick: () => {
toggleActiveDate(id);
Expand Down
4 changes: 2 additions & 2 deletions packages/app/src/components/inference/ui/InferenceTable.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ export default function InferenceTable({
() => [
{
header: 'GPU',
cell: (row) => getDisplayLabel(getHardwareConfig(row.hwKey)),
sortValue: (row) => getDisplayLabel(getHardwareConfig(row.hwKey)),
cell: (row) => getDisplayLabel(getHardwareConfig(row.hwKey, row.model)),
sortValue: (row) => getDisplayLabel(getHardwareConfig(row.hwKey, row.model)),
className: 'font-medium whitespace-nowrap',
},
{
Expand Down
28 changes: 17 additions & 11 deletions packages/app/src/components/inference/ui/ScatterGraph.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -102,17 +102,23 @@ const formatChangelogDescription = (desc: string | string[]): React.JSX.Element

const CHART_MARGIN = { top: 24, right: 10, bottom: 60, left: 60 };

// Derive a readable label from a hwKey using the HARDWARE_CONFIG source of truth
const parseHwKeyToLabel = (hwKey: string): { name: string; label: string } => {
const config = getHardwareConfig(hwKey);
// Derive a readable label from a hwKey using the HARDWARE_CONFIG source of truth.
// `model` (display name) enables per-model suffix overrides (e.g. M3 MTP → EAGLE).
const parseHwKeyToLabel = (hwKey: string, model?: string): { name: string; label: string } => {
const config = getHardwareConfig(hwKey, model);
return { name: config.label, label: getDisplayLabel(config) };
};

// Line-label text for a curve. When more than one precision is shown, each curve
// is its own line, so append the precision (e.g. "B200 (vLLM) FP8") to keep the
// FP4 and FP8 curves of the same hardware distinguishable.
const lineLabelText = (hwKey: string, precision: string, includePrecision: boolean): string => {
const base = parseHwKeyToLabel(hwKey).label;
const lineLabelText = (
hwKey: string,
precision: string,
includePrecision: boolean,
model?: string,
): string => {
const base = parseHwKeyToLabel(hwKey, model).label;
return includePrecision ? `${base} ${getPrecisionLabel(precision as Precision)}` : base;
};

Expand Down Expand Up @@ -366,7 +372,7 @@ const ScatterGraph = React.memo(
const visiblePoints = [...filteredData, ...visibleOverlayPoints];
return matchKnownConfigIssues(modelLabel, visiblePoints).map((issue) => ({
issue,
label: parseHwKeyToLabel(issue.hwKey).label,
label: parseHwKeyToLabel(issue.hwKey, modelLabel).label,
color: getCssColor(resolveColor(issue.hwKey)),
points: visiblePoints
.filter((p) => pointMatchesIssue(issue, p))
Expand Down Expand Up @@ -1065,7 +1071,7 @@ const ScatterGraph = React.memo(
placeLabel(
entry.key,
entry.hw,
lineLabelText(entry.hw, entry.precision, multiPrecision),
lineLabelText(entry.hw, entry.precision, multiPrecision, modelLabel),
getCssColor(resolveColor(entry.hw)),
entry.points,
);
Expand All @@ -1079,7 +1085,7 @@ const ScatterGraph = React.memo(
lineLabels.push({
key: entry.key,
hw: entry.hw,
label: lineLabelText(entry.hw, entry.precision, multiPrecision),
label: lineLabelText(entry.hw, entry.precision, multiPrecision, modelLabel),
color: getCssColor(resolveColor(entry.hw)),
x: xScale(entry.points[0].x),
y: yScale(entry.points[0].y),
Expand All @@ -1101,7 +1107,7 @@ const ScatterGraph = React.memo(
const info = unofficialRunInfos[runIndex];
const base = info
? `✕ ${info.branch || `run ${info.id}`}`
: parseHwKeyToLabel(hwKey).label;
: parseHwKeyToLabel(hwKey, modelLabel).label;
return multiPrecision
? `${base} ${getPrecisionLabel(precision as Precision)}`
: base;
Expand Down Expand Up @@ -1144,7 +1150,7 @@ const ScatterGraph = React.memo(
lineLabels.push({
key: entry.key,
hw: entry.hw,
label: lineLabelText(entry.hw, entry.precision, multiPrecision),
label: lineLabelText(entry.hw, entry.precision, multiPrecision, modelLabel),
color: getCssColor(resolveColor(entry.hw)),
x: xScale(pt.x),
y: yScale(pt.y),
Expand All @@ -1158,7 +1164,7 @@ const ScatterGraph = React.memo(
const info = unofficialRunInfos[group.runIndex];
const branchOrHw = info
? `✕ ${info.branch || `run ${info.id}`}`
: parseHwKeyToLabel(group.hwKey).label;
: parseHwKeyToLabel(group.hwKey, modelLabel).label;
const labelText = multiPrecision
? `${branchOrHw} ${getPrecisionLabel((group.points[0]?.precision ?? '') as Precision)}`
: branchOrHw;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ export function UnofficialChartDisplay() {
hardwareConfig: Object.fromEntries(
Object.entries(dataForChart.gpus || {}).map(([k, v]) => [
k,
{ ...getHardwareConfig(k), ...v },
{ ...getHardwareConfig(k, selectedModel), ...v },
]),
),
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@ describe('formatConfigKeys', () => {
expect(result).toContain('FP8');
});

it('renders M3 mtp as EAGLE (not MTP)', () => {
const result = formatConfigKeys('minimaxm3-fp8-h100-vllm-mtp');
expect(result).toContain('H100');
expect(result).toContain('EAGLE');
expect(result).not.toContain('MTP');
});

it('formats compound framework names', () => {
const result = formatConfigKeys('gptoss-fp4-b200-dynamo-sglang');
expect(result).toContain('B200');
Expand Down
Loading
Loading