Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 25 additions & 1 deletion packages/app/src/app/api/v1/benchmarks/route.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { describe, expect, it, vi, beforeEach } from 'vitest';

const { mockGetLatestBenchmarks, mockGetDb } = vi.hoisted(() => ({
const { mockGetLatestBenchmarks, mockGetBenchmarksForRun, mockGetDb } = vi.hoisted(() => ({
mockGetLatestBenchmarks: vi.fn(),
mockGetBenchmarksForRun: vi.fn(),
mockGetDb: vi.fn(() => 'mock-sql'),
}));

Expand All @@ -13,6 +14,7 @@ vi.mock('@semianalysisai/inferencex-db/connection', () => ({

vi.mock('@semianalysisai/inferencex-db/queries/benchmarks', () => ({
getLatestBenchmarks: mockGetLatestBenchmarks,
getBenchmarksForRun: mockGetBenchmarksForRun,
}));

vi.mock('@/lib/api-cache', () => ({
Expand Down Expand Up @@ -125,6 +127,28 @@ describe('GET /api/v1/benchmarks', () => {
);
});

it('routes exactRun=true + runId to the exact-run query', async () => {
const runRows = [{ id: 1, hardware: 'mi300x' }];
mockGetBenchmarksForRun.mockResolvedValueOnce(runRows);

const res = await GET(
req('/api/v1/benchmarks?model=DeepSeek-R1-0528&runId=27489075807&exactRun=true'),
);
expect(res.status).toBe(200);
expect(await res.json()).toEqual(runRows);
expect(mockGetBenchmarksForRun).toHaveBeenCalledWith('mock-sql', ['dsr1'], '27489075807');
expect(mockGetLatestBenchmarks).not.toHaveBeenCalled();
});

it('ignores exactRun without a runId (falls back to latest)', async () => {
mockGetLatestBenchmarks.mockResolvedValueOnce([]);

const res = await GET(req('/api/v1/benchmarks?model=DeepSeek-R1-0528&exactRun=true'));
expect(res.status).toBe(200);
expect(mockGetBenchmarksForRun).not.toHaveBeenCalled();
expect(mockGetLatestBenchmarks).toHaveBeenCalled();
});

it('returns 500 when query throws', async () => {
mockGetLatestBenchmarks.mockRejectedValueOnce(new Error('DB down'));

Expand Down
23 changes: 21 additions & 2 deletions packages/app/src/app/api/v1/benchmarks/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ import { type NextRequest, NextResponse } from 'next/server';
import { DISPLAY_MODEL_TO_DB } from '@semianalysisai/inferencex-constants';
import { FIXTURES_MODE, JSON_MODE, getDb } from '@semianalysisai/inferencex-db/connection';
import * as jsonProvider from '@semianalysisai/inferencex-db/json-provider';
import { getLatestBenchmarks } from '@semianalysisai/inferencex-db/queries/benchmarks';
import {
getBenchmarksForRun,
getLatestBenchmarks,
} from '@semianalysisai/inferencex-db/queries/benchmarks';

import { cachedJson, cachedQuery } from '@/lib/api-cache';
import { loadFixture } from '@/lib/test-fixtures';
Expand All @@ -20,6 +23,17 @@ const getCachedBenchmarks = cachedQuery(
{ blobOnly: true },
);

// Exactly one run's results (GPU comparison of individual same-day runs). Cached
// under a distinct key prefix so it never collides with the latest/as-of query.
const getCachedBenchmarksForRun = cachedQuery(
(dbModelKeys: string[], runId: string) => {
if (JSON_MODE) return Promise.resolve(jsonProvider.getBenchmarksForRun(dbModelKeys, runId));
return getBenchmarksForRun(getDb(), dbModelKeys, runId);
},
'benchmarks-run',
{ blobOnly: true },
);

export async function GET(request: NextRequest) {
const params = request.nextUrl.searchParams;
const model = params.get('model') ?? '';
Expand All @@ -28,14 +42,19 @@ export async function GET(request: NextRequest) {
// Numeric GitHub run id only — anything else is ignored (treated as "latest").
const runIdParam = params.get('runId');
const runId = runIdParam && /^\d+$/u.test(runIdParam) ? runIdParam : undefined;
// exactRun=true → return exactly this run's results (GPU comparison of same-day runs).
const exactRun = params.get('exactRun') === 'true';
const dbModelKeys = DISPLAY_MODEL_TO_DB[model];
if (!dbModelKeys || dbModelKeys.length === 0) {
return NextResponse.json({ error: 'Unknown model' }, { status: 400 });
}
if (FIXTURES_MODE) return cachedJson(loadFixture('benchmarks'));

try {
const rows = await getCachedBenchmarks(dbModelKeys, date, exact || undefined, runId);
const rows =
exactRun && runId
? await getCachedBenchmarksForRun(dbModelKeys, runId)
: await getCachedBenchmarks(dbModelKeys, date, exact || undefined, runId);
return cachedJson(rows);
} catch (error) {
console.error('Error fetching benchmarks:', error);
Expand Down
31 changes: 23 additions & 8 deletions packages/app/src/app/api/v1/workflow-info/route.test.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
import { describe, expect, it, vi, beforeEach } from 'vitest';

const { mockGetWorkflowRunsByDate, mockGetChangelogByDate, mockGetDateConfigs, mockGetDb } =
vi.hoisted(() => ({
mockGetWorkflowRunsByDate: vi.fn(),
mockGetChangelogByDate: vi.fn(),
mockGetDateConfigs: vi.fn(),
mockGetDb: vi.fn(() => 'mock-sql'),
}));
const {
mockGetWorkflowRunsByDate,
mockGetChangelogByDate,
mockGetDateConfigs,
mockGetRunConfigsByDate,
mockGetDb,
} = vi.hoisted(() => ({
mockGetWorkflowRunsByDate: vi.fn(),
mockGetChangelogByDate: vi.fn(),
mockGetDateConfigs: vi.fn(),
mockGetRunConfigsByDate: vi.fn(),
mockGetDb: vi.fn(() => 'mock-sql'),
}));

vi.mock('@semianalysisai/inferencex-db/connection', () => ({
getDb: mockGetDb,
Expand All @@ -18,6 +24,7 @@ vi.mock('@semianalysisai/inferencex-db/queries/workflow-info', () => ({
getWorkflowRunsByDate: mockGetWorkflowRunsByDate,
getChangelogByDate: mockGetChangelogByDate,
getDateConfigs: mockGetDateConfigs,
getRunConfigsByDate: mockGetRunConfigsByDate,
}));

vi.mock('@/lib/api-cache', () => ({
Expand Down Expand Up @@ -60,9 +67,13 @@ describe('GET /api/v1/workflow-info', () => {
const mockRuns = [{ id: 1, status: 'completed' }];
const mockChangelogs = [{ version: '1.0', changes: 'Initial' }];
const mockConfigs = [{ model: 'dsr1', gpu: 'h200' }];
const mockRunConfigs = [
{ github_run_id: 1, model: 'dsr1', hardware: 'h200', framework: 'vllm' },
];
mockGetWorkflowRunsByDate.mockResolvedValueOnce(mockRuns);
mockGetChangelogByDate.mockResolvedValueOnce(mockChangelogs);
mockGetDateConfigs.mockResolvedValueOnce(mockConfigs);
mockGetRunConfigsByDate.mockResolvedValueOnce(mockRunConfigs);

const res = await GET(req('/api/v1/workflow-info?date=2026-03-01'));
expect(res.status).toBe(200);
Expand All @@ -71,28 +82,32 @@ describe('GET /api/v1/workflow-info', () => {
runs: mockRuns,
changelogs: mockChangelogs,
configs: mockConfigs,
runConfigs: mockRunConfigs,
});
expect(mockGetWorkflowRunsByDate).toHaveBeenCalledWith('mock-sql', '2026-03-01');
expect(mockGetChangelogByDate).toHaveBeenCalledWith('mock-sql', '2026-03-01');
expect(mockGetDateConfigs).toHaveBeenCalledWith('mock-sql', '2026-03-01');
expect(mockGetRunConfigsByDate).toHaveBeenCalledWith('mock-sql', '2026-03-01');
});

it('accepts empty date param (returns all)', async () => {
mockGetWorkflowRunsByDate.mockResolvedValueOnce([]);
mockGetChangelogByDate.mockResolvedValueOnce([]);
mockGetDateConfigs.mockResolvedValueOnce([]);
mockGetRunConfigsByDate.mockResolvedValueOnce([]);

const res = await GET(req('/api/v1/workflow-info'));
expect(res.status).toBe(200);
const body = await res.json();
expect(body).toEqual({ runs: [], changelogs: [], configs: [] });
expect(body).toEqual({ runs: [], changelogs: [], configs: [], runConfigs: [] });
expect(mockGetWorkflowRunsByDate).toHaveBeenCalledWith('mock-sql', '');
});

it('returns 500 when any query throws', async () => {
mockGetWorkflowRunsByDate.mockRejectedValueOnce(new Error('Timeout'));
mockGetChangelogByDate.mockResolvedValueOnce([]);
mockGetDateConfigs.mockResolvedValueOnce([]);
mockGetRunConfigsByDate.mockResolvedValueOnce([]);

const res = await GET(req('/api/v1/workflow-info?date=2026-03-01'));
expect(res.status).toBe(500);
Expand Down
7 changes: 5 additions & 2 deletions packages/app/src/app/api/v1/workflow-info/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import * as jsonProvider from '@semianalysisai/inferencex-db/json-provider';
import {
getChangelogByDate,
getDateConfigs,
getRunConfigsByDate,
getWorkflowRunsByDate,
} from '@semianalysisai/inferencex-db/queries/workflow-info';

Expand All @@ -19,15 +20,17 @@ const getCachedWorkflowInfo = cachedQuery(async (date: string) => {
runs: jsonProvider.getWorkflowRunsByDate(date),
changelogs: jsonProvider.getChangelogByDate(date),
configs: jsonProvider.getDateConfigs(date),
runConfigs: jsonProvider.getRunConfigsByDate(date),
};
}
const sql = getDb();
const [runs, changelogs, configs] = await Promise.all([
const [runs, changelogs, configs, runConfigs] = await Promise.all([
getWorkflowRunsByDate(sql, date),
getChangelogByDate(sql, date),
getDateConfigs(sql, date),
getRunConfigsByDate(sql, date),
]);
return { runs, changelogs, configs };
return { runs, changelogs, configs, runConfigs };
}, 'workflow-info');

export async function GET(request: NextRequest) {
Expand Down
13 changes: 7 additions & 6 deletions packages/app/src/components/inference/InferenceContext.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import {
type ReactNode,
type SetStateAction,
createContext,
useCallback,
useContext,
Expand Down Expand Up @@ -57,6 +58,7 @@ import {
import { filterRunsByModel, getDisplayLabel } from '@/lib/utils';

import { useChartData } from './hooks/useChartData';
import { resolveComparisonEntries } from './utils/comparisonEntry';

/** @internal Exported for test provider wrapping only. */
export const InferenceContext = createContext<InferenceChartContextType | undefined>(undefined);
Expand Down Expand Up @@ -416,7 +418,10 @@ export function InferenceProvider({
[setSelectedGPUs, clearPresetOnChange],
);
const setSelectedDatesAndClear = useCallback(
(v: string[]) => {
// Accept a React state updater (value OR function) so callers adding several
// dates/runs in quick succession can use the functional form and avoid the
// stale-closure race where each click overwrites the last.
(v: SetStateAction<string[]>) => {
setSelectedDates(v);
clearPresetOnChange();
},
Expand Down Expand Up @@ -564,11 +569,7 @@ export function InferenceProvider({
);

const allDateIds = useMemo(() => {
const dates: string[] = [];
if (selectedDateRange.startDate && selectedDateRange.endDate) {
dates.push(selectedDateRange.startDate, selectedDateRange.endDate);
}
dates.push(...selectedDates);
const dates = resolveComparisonEntries(selectedDates, selectedDateRange);
const allIds = new Set<string>();
selectedGPUs.forEach((gpu) => {
dates.forEach((date) => allIds.add(`${date}_${gpu}`));
Expand Down
27 changes: 18 additions & 9 deletions packages/app/src/components/inference/hooks/useChartData.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ import type {
YAxisMetricKey,
} from '@/components/inference/types';
import { filterDataByCostLimit } from '@/components/inference/utils';
import {
parseComparisonEntry,
resolveComparisonEntries,
} from '@/components/inference/utils/comparisonEntry';
import { useBenchmarks, benchmarkQueryOptions } from '@/hooks/api/use-benchmarks';
import {
GPU_ALIAS_TO_CANONICAL,
Expand All @@ -31,12 +35,11 @@ export function buildComparisonDates(
selectedRunDate: string | undefined,
): string[] {
if (selectedGPUs.length === 0) return [];
const dates: string[] = [];
if (selectedDateRange.startDate && selectedDateRange.endDate) {
dates.push(selectedDateRange.startDate, selectedDateRange.endDate);
}
dates.push(...selectedDates);
return [...new Set(dates.filter((d) => d !== selectedRunDate))];
// Range endpoints + individually-added dates/runs (redundant same-day range
// endpoints dropped), minus the main run date which the primary query covers.
return resolveComparisonEntries(selectedDates, selectedDateRange).filter(
(d) => d !== selectedRunDate,
);
}

/** Filter data by GPU key, resolving aliases to canonical keys. */
Expand Down Expand Up @@ -116,10 +119,16 @@ export function useChartData(
[selectedGPUs, selectedDates, selectedDateRange, selectedRunDate],
);

// Each comparison entry is either a plain date (latest run that day, exact-date
// query) or a specific run encoded as `date~r<id>~<i>of<n>` (exact-run query) so
// multiple same-day runs can be compared as distinct series.
const comparisonQueries = useQueries({
queries: comparisonDates.map((date) =>
benchmarkQueryOptions(selectedModel, date, enabled, true),
),
queries: comparisonDates.map((entry) => {
const parsed = parseComparisonEntry(entry);
return parsed.runId
? benchmarkQueryOptions(selectedModel, '', enabled, false, parsed.runId, true)
: benchmarkQueryOptions(selectedModel, entry, enabled, true);
}),
});

const comparisonLoading = comparisonQueries.some((q) => q.isLoading);
Expand Down
11 changes: 10 additions & 1 deletion packages/app/src/components/inference/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,14 @@ export interface ScatterGraphProps {
* playback).
*/
niceAxes?: boolean;
/**
* Stable run numbering (entry string `date~rRunId` → 1-based number) shared with
* the comparison changelog so legend labels match it exactly. Numbers index ALL
* of a date's runs (not just the ones on the chart), so a removed run leaves a
* gap that lines up with the changelog's still-listed "Add to chart" run. When
* omitted, GPUGraph falls back to gap-free numbering of the on-chart series.
*/
runNumbering?: Map<string, number>;
}
/**
* @file types.ts
Expand Down Expand Up @@ -639,7 +647,8 @@ export interface InferenceChartContextType {
setSelectedGPUs: (gpus: string[]) => void;
availableGPUs: { value: string; label: string }[];
selectedDates: string[];
setSelectedDates: (dates: string[]) => void;
/** Accepts a value or a state-updater fn (for safe rapid successive adds). */
setSelectedDates: (dates: string[] | ((prev: string[]) => string[])) => void;
selectedDateRange: { startDate: string; endDate: string };
setSelectedDateRange: (dateRange: { startDate: string; endDate: string }) => void;
userCosts: Record<string, number | undefined> | null;
Expand Down
Loading
Loading