SemiAnalysisAI · adibarra · Jun 15, 2026 · Jun 15, 2026 · Jun 15, 2026 · Jun 15, 2026
@@ -1,7 +1,8 @@
 import { describe, expect, it, vi, beforeEach } from 'vitest';
 
-const { mockGetLatestBenchmarks, mockGetDb } = vi.hoisted(() => ({
+const { mockGetLatestBenchmarks, mockGetBenchmarksForRun, mockGetDb } = vi.hoisted(() => ({
   mockGetLatestBenchmarks: vi.fn(),
+  mockGetBenchmarksForRun: vi.fn(),
   mockGetDb: vi.fn(() => 'mock-sql'),
 }));
 
@@ -13,6 +14,7 @@ vi.mock('@semianalysisai/inferencex-db/connection', () => ({
 
 vi.mock('@semianalysisai/inferencex-db/queries/benchmarks', () => ({
   getLatestBenchmarks: mockGetLatestBenchmarks,
+  getBenchmarksForRun: mockGetBenchmarksForRun,
 }));
 
 vi.mock('@/lib/api-cache', () => ({
@@ -125,6 +127,28 @@ describe('GET /api/v1/benchmarks', () => {
     );
   });
 
+  it('routes exactRun=true + runId to the exact-run query', async () => {
+    const runRows = [{ id: 1, hardware: 'mi300x' }];
+    mockGetBenchmarksForRun.mockResolvedValueOnce(runRows);
+
+    const res = await GET(
+      req('/api/v1/benchmarks?model=DeepSeek-R1-0528&runId=27489075807&exactRun=true'),
+    );
+    expect(res.status).toBe(200);
+    expect(await res.json()).toEqual(runRows);
+    expect(mockGetBenchmarksForRun).toHaveBeenCalledWith('mock-sql', ['dsr1'], '27489075807');
+    expect(mockGetLatestBenchmarks).not.toHaveBeenCalled();
+  });
+
+  it('ignores exactRun without a runId (falls back to latest)', async () => {
+    mockGetLatestBenchmarks.mockResolvedValueOnce([]);
+
+    const res = await GET(req('/api/v1/benchmarks?model=DeepSeek-R1-0528&exactRun=true'));
+    expect(res.status).toBe(200);
+    expect(mockGetBenchmarksForRun).not.toHaveBeenCalled();
+    expect(mockGetLatestBenchmarks).toHaveBeenCalled();
+  });
+
   it('returns 500 when query throws', async () => {
     mockGetLatestBenchmarks.mockRejectedValueOnce(new Error('DB down'));
 

@@ -3,7 +3,10 @@ import { type NextRequest, NextResponse } from 'next/server';
 import { DISPLAY_MODEL_TO_DB } from '@semianalysisai/inferencex-constants';
 import { FIXTURES_MODE, JSON_MODE, getDb } from '@semianalysisai/inferencex-db/connection';
 import * as jsonProvider from '@semianalysisai/inferencex-db/json-provider';
-import { getLatestBenchmarks } from '@semianalysisai/inferencex-db/queries/benchmarks';
+import {
+  getBenchmarksForRun,
+  getLatestBenchmarks,
+} from '@semianalysisai/inferencex-db/queries/benchmarks';
 
 import { cachedJson, cachedQuery } from '@/lib/api-cache';
 import { loadFixture } from '@/lib/test-fixtures';
@@ -20,6 +23,17 @@ const getCachedBenchmarks = cachedQuery(
   { blobOnly: true },
 );
 
+// Exactly one run's results (GPU comparison of individual same-day runs). Cached
+// under a distinct key prefix so it never collides with the latest/as-of query.
+const getCachedBenchmarksForRun = cachedQuery(
+  (dbModelKeys: string[], runId: string) => {
+    if (JSON_MODE) return Promise.resolve(jsonProvider.getBenchmarksForRun(dbModelKeys, runId));
+    return getBenchmarksForRun(getDb(), dbModelKeys, runId);
+  },
+  'benchmarks-run',
+  { blobOnly: true },
+);
+
 export async function GET(request: NextRequest) {
   const params = request.nextUrl.searchParams;
   const model = params.get('model') ?? '';
@@ -28,14 +42,19 @@ export async function GET(request: NextRequest) {
   // Numeric GitHub run id only — anything else is ignored (treated as "latest").
   const runIdParam = params.get('runId');
   const runId = runIdParam && /^\d+$/u.test(runIdParam) ? runIdParam : undefined;
+  // exactRun=true → return exactly this run's results (GPU comparison of same-day runs).
+  const exactRun = params.get('exactRun') === 'true';
   const dbModelKeys = DISPLAY_MODEL_TO_DB[model];
   if (!dbModelKeys || dbModelKeys.length === 0) {
     return NextResponse.json({ error: 'Unknown model' }, { status: 400 });
   }
   if (FIXTURES_MODE) return cachedJson(loadFixture('benchmarks'));
 
   try {
-    const rows = await getCachedBenchmarks(dbModelKeys, date, exact || undefined, runId);
+    const rows =
+      exactRun && runId
+        ? await getCachedBenchmarksForRun(dbModelKeys, runId)
+        : await getCachedBenchmarks(dbModelKeys, date, exact || undefined, runId);
     return cachedJson(rows);
   } catch (error) {
     console.error('Error fetching benchmarks:', error);

@@ -1,12 +1,18 @@
 import { describe, expect, it, vi, beforeEach } from 'vitest';
 
-const { mockGetWorkflowRunsByDate, mockGetChangelogByDate, mockGetDateConfigs, mockGetDb } =
-  vi.hoisted(() => ({
-    mockGetWorkflowRunsByDate: vi.fn(),
-    mockGetChangelogByDate: vi.fn(),
-    mockGetDateConfigs: vi.fn(),
-    mockGetDb: vi.fn(() => 'mock-sql'),
-  }));
+const {
+  mockGetWorkflowRunsByDate,
+  mockGetChangelogByDate,
+  mockGetDateConfigs,
+  mockGetRunConfigsByDate,
+  mockGetDb,
+} = vi.hoisted(() => ({
+  mockGetWorkflowRunsByDate: vi.fn(),
+  mockGetChangelogByDate: vi.fn(),
+  mockGetDateConfigs: vi.fn(),
+  mockGetRunConfigsByDate: vi.fn(),
+  mockGetDb: vi.fn(() => 'mock-sql'),
+}));
 
 vi.mock('@semianalysisai/inferencex-db/connection', () => ({
   getDb: mockGetDb,
@@ -18,6 +24,7 @@ vi.mock('@semianalysisai/inferencex-db/queries/workflow-info', () => ({
   getWorkflowRunsByDate: mockGetWorkflowRunsByDate,
   getChangelogByDate: mockGetChangelogByDate,
   getDateConfigs: mockGetDateConfigs,
+  getRunConfigsByDate: mockGetRunConfigsByDate,
 }));
 
 vi.mock('@/lib/api-cache', () => ({
@@ -60,9 +67,13 @@ describe('GET /api/v1/workflow-info', () => {
     const mockRuns = [{ id: 1, status: 'completed' }];
     const mockChangelogs = [{ version: '1.0', changes: 'Initial' }];
     const mockConfigs = [{ model: 'dsr1', gpu: 'h200' }];
+    const mockRunConfigs = [
+      { github_run_id: 1, model: 'dsr1', hardware: 'h200', framework: 'vllm' },
+    ];
     mockGetWorkflowRunsByDate.mockResolvedValueOnce(mockRuns);
     mockGetChangelogByDate.mockResolvedValueOnce(mockChangelogs);
     mockGetDateConfigs.mockResolvedValueOnce(mockConfigs);
+    mockGetRunConfigsByDate.mockResolvedValueOnce(mockRunConfigs);
 
     const res = await GET(req('/api/v1/workflow-info?date=2026-03-01'));
     expect(res.status).toBe(200);
@@ -71,28 +82,32 @@ describe('GET /api/v1/workflow-info', () => {
       runs: mockRuns,
       changelogs: mockChangelogs,
       configs: mockConfigs,
+      runConfigs: mockRunConfigs,
     });
     expect(mockGetWorkflowRunsByDate).toHaveBeenCalledWith('mock-sql', '2026-03-01');
     expect(mockGetChangelogByDate).toHaveBeenCalledWith('mock-sql', '2026-03-01');
     expect(mockGetDateConfigs).toHaveBeenCalledWith('mock-sql', '2026-03-01');
+    expect(mockGetRunConfigsByDate).toHaveBeenCalledWith('mock-sql', '2026-03-01');
   });
 
   it('accepts empty date param (returns all)', async () => {
     mockGetWorkflowRunsByDate.mockResolvedValueOnce([]);
     mockGetChangelogByDate.mockResolvedValueOnce([]);
     mockGetDateConfigs.mockResolvedValueOnce([]);
+    mockGetRunConfigsByDate.mockResolvedValueOnce([]);
 
     const res = await GET(req('/api/v1/workflow-info'));
     expect(res.status).toBe(200);
     const body = await res.json();
-    expect(body).toEqual({ runs: [], changelogs: [], configs: [] });
+    expect(body).toEqual({ runs: [], changelogs: [], configs: [], runConfigs: [] });
     expect(mockGetWorkflowRunsByDate).toHaveBeenCalledWith('mock-sql', '');
   });
 
   it('returns 500 when any query throws', async () => {
     mockGetWorkflowRunsByDate.mockRejectedValueOnce(new Error('Timeout'));
     mockGetChangelogByDate.mockResolvedValueOnce([]);
     mockGetDateConfigs.mockResolvedValueOnce([]);
+    mockGetRunConfigsByDate.mockResolvedValueOnce([]);
 
     const res = await GET(req('/api/v1/workflow-info?date=2026-03-01'));
     expect(res.status).toBe(500);

@@ -5,6 +5,7 @@ import * as jsonProvider from '@semianalysisai/inferencex-db/json-provider';
 import {
   getChangelogByDate,
   getDateConfigs,
+  getRunConfigsByDate,
   getWorkflowRunsByDate,
 } from '@semianalysisai/inferencex-db/queries/workflow-info';
 
@@ -19,15 +20,17 @@ const getCachedWorkflowInfo = cachedQuery(async (date: string) => {
       runs: jsonProvider.getWorkflowRunsByDate(date),
       changelogs: jsonProvider.getChangelogByDate(date),
       configs: jsonProvider.getDateConfigs(date),
+      runConfigs: jsonProvider.getRunConfigsByDate(date),
     };
   }
   const sql = getDb();
-  const [runs, changelogs, configs] = await Promise.all([
+  const [runs, changelogs, configs, runConfigs] = await Promise.all([
     getWorkflowRunsByDate(sql, date),
     getChangelogByDate(sql, date),
     getDateConfigs(sql, date),
+    getRunConfigsByDate(sql, date),
   ]);
-  return { runs, changelogs, configs };
+  return { runs, changelogs, configs, runConfigs };
 }, 'workflow-info');
 
 export async function GET(request: NextRequest) {

@@ -2,6 +2,7 @@
 
 import {
   type ReactNode,
+  type SetStateAction,
   createContext,
   useCallback,
   useContext,
@@ -57,6 +58,7 @@ import {
 import { filterRunsByModel, getDisplayLabel } from '@/lib/utils';
 
 import { useChartData } from './hooks/useChartData';
+import { resolveComparisonEntries } from './utils/comparisonEntry';
 
 /** @internal Exported for test provider wrapping only. */
 export const InferenceContext = createContext<InferenceChartContextType | undefined>(undefined);
@@ -416,7 +418,10 @@ export function InferenceProvider({
     [setSelectedGPUs, clearPresetOnChange],
   );
   const setSelectedDatesAndClear = useCallback(
-    (v: string[]) => {
+    // Accept a React state updater (value OR function) so callers adding several
+    // dates/runs in quick succession can use the functional form and avoid the
+    // stale-closure race where each click overwrites the last.
+    (v: SetStateAction<string[]>) => {
       setSelectedDates(v);
       clearPresetOnChange();
     },
@@ -564,11 +569,7 @@ export function InferenceProvider({
   );
 
   const allDateIds = useMemo(() => {
-    const dates: string[] = [];
-    if (selectedDateRange.startDate && selectedDateRange.endDate) {
-      dates.push(selectedDateRange.startDate, selectedDateRange.endDate);
-    }
-    dates.push(...selectedDates);
+    const dates = resolveComparisonEntries(selectedDates, selectedDateRange);
     const allIds = new Set<string>();
     selectedGPUs.forEach((gpu) => {
       dates.forEach((date) => allIds.add(`${date}_${gpu}`));

@@ -13,6 +13,10 @@ import type {
   YAxisMetricKey,
 } from '@/components/inference/types';
 import { filterDataByCostLimit } from '@/components/inference/utils';
+import {
+  parseComparisonEntry,
+  resolveComparisonEntries,
+} from '@/components/inference/utils/comparisonEntry';
 import { useBenchmarks, benchmarkQueryOptions } from '@/hooks/api/use-benchmarks';
 import {
   GPU_ALIAS_TO_CANONICAL,
@@ -31,12 +35,11 @@ export function buildComparisonDates(
   selectedRunDate: string | undefined,
 ): string[] {
   if (selectedGPUs.length === 0) return [];
-  const dates: string[] = [];
-  if (selectedDateRange.startDate && selectedDateRange.endDate) {
-    dates.push(selectedDateRange.startDate, selectedDateRange.endDate);
-  }
-  dates.push(...selectedDates);
-  return [...new Set(dates.filter((d) => d !== selectedRunDate))];
+  // Range endpoints + individually-added dates/runs (redundant same-day range
+  // endpoints dropped), minus the main run date which the primary query covers.
+  return resolveComparisonEntries(selectedDates, selectedDateRange).filter(
+    (d) => d !== selectedRunDate,
+  );
 }
 
 /** Filter data by GPU key, resolving aliases to canonical keys. */
@@ -116,10 +119,16 @@ export function useChartData(
     [selectedGPUs, selectedDates, selectedDateRange, selectedRunDate],
   );
 
+  // Each comparison entry is either a plain date (latest run that day, exact-date
+  // query) or a specific run encoded as `date~r<id>~<i>of<n>` (exact-run query) so
+  // multiple same-day runs can be compared as distinct series.
   const comparisonQueries = useQueries({
-    queries: comparisonDates.map((date) =>
-      benchmarkQueryOptions(selectedModel, date, enabled, true),
-    ),
+    queries: comparisonDates.map((entry) => {
+      const parsed = parseComparisonEntry(entry);
+      return parsed.runId
+        ? benchmarkQueryOptions(selectedModel, '', enabled, false, parsed.runId, true)
+        : benchmarkQueryOptions(selectedModel, entry, enabled, true);
+    }),
   });
 
   const comparisonLoading = comparisonQueries.some((q) => q.isLoading);

@@ -481,6 +481,14 @@ export interface ScatterGraphProps {
    * playback).
    */
   niceAxes?: boolean;
+  /**
+   * Stable run numbering (entry string `date~rRunId` → 1-based number) shared with
+   * the comparison changelog so legend labels match it exactly. Numbers index ALL
+   * of a date's runs (not just the ones on the chart), so a removed run leaves a
+   * gap that lines up with the changelog's still-listed "Add to chart" run. When
+   * omitted, GPUGraph falls back to gap-free numbering of the on-chart series.
+   */
+  runNumbering?: Map<string, number>;
 }
 /**
  * @file types.ts
@@ -639,7 +647,8 @@ export interface InferenceChartContextType {
   setSelectedGPUs: (gpus: string[]) => void;
   availableGPUs: { value: string; label: string }[];
   selectedDates: string[];
-  setSelectedDates: (dates: string[]) => void;
+  /** Accepts a value or a state-updater fn (for safe rapid successive adds). */
+  setSelectedDates: (dates: string[] | ((prev: string[]) => string[])) => void;
   selectedDateRange: { startDate: string; endDate: string };
   setSelectedDateRange: (dateRange: { startDate: string; endDate: string }) => void;
   userCosts: Record<string, number | undefined> | null;