Skip to content

Commit 13038d7

Browse files
authored
feat(inference): time-travel chart data to the selected run (as-of-run filtering) (#459)
1 parent e7ee836 commit 13038d7

10 files changed

Lines changed: 346 additions & 35 deletions

File tree

packages/app/src/app/api/v1/benchmarks/route.test.ts

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ describe('GET /api/v1/benchmarks', () => {
5959
['dsr1'],
6060
undefined,
6161
undefined,
62+
undefined,
6263
);
6364
});
6465

@@ -72,6 +73,7 @@ describe('GET /api/v1/benchmarks', () => {
7273
['dsr1'],
7374
'2026-03-01',
7475
undefined,
76+
undefined,
7577
);
7678
});
7779

@@ -82,7 +84,45 @@ describe('GET /api/v1/benchmarks', () => {
8284
req('/api/v1/benchmarks?model=DeepSeek-R1-0528&date=2026-03-01&exact=true'),
8385
);
8486
expect(res.status).toBe(200);
85-
expect(mockGetLatestBenchmarks).toHaveBeenCalledWith('mock-sql', ['dsr1'], '2026-03-01', true);
87+
expect(mockGetLatestBenchmarks).toHaveBeenCalledWith(
88+
'mock-sql',
89+
['dsr1'],
90+
'2026-03-01',
91+
true,
92+
undefined,
93+
);
94+
});
95+
96+
it('passes a numeric runId through to the query', async () => {
97+
mockGetLatestBenchmarks.mockResolvedValueOnce([]);
98+
99+
const res = await GET(
100+
req('/api/v1/benchmarks?model=DeepSeek-R1-0528&date=2026-03-01&runId=27489075807'),
101+
);
102+
expect(res.status).toBe(200);
103+
expect(mockGetLatestBenchmarks).toHaveBeenCalledWith(
104+
'mock-sql',
105+
['dsr1'],
106+
'2026-03-01',
107+
undefined,
108+
'27489075807',
109+
);
110+
});
111+
112+
it('ignores a non-numeric runId (treated as latest)', async () => {
113+
mockGetLatestBenchmarks.mockResolvedValueOnce([]);
114+
115+
const res = await GET(
116+
req('/api/v1/benchmarks?model=DeepSeek-R1-0528&date=2026-03-01&runId=not-a-run'),
117+
);
118+
expect(res.status).toBe(200);
119+
expect(mockGetLatestBenchmarks).toHaveBeenCalledWith(
120+
'mock-sql',
121+
['dsr1'],
122+
'2026-03-01',
123+
undefined,
124+
undefined,
125+
);
86126
});
87127

88128
it('returns 500 when query throws', async () => {

packages/app/src/app/api/v1/benchmarks/route.ts

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ import { loadFixture } from '@/lib/test-fixtures';
1111
export const dynamic = 'force-dynamic';
1212

1313
const getCachedBenchmarks = cachedQuery(
14-
(dbModelKeys: string[], date?: string, exact?: boolean) => {
14+
(dbModelKeys: string[], date?: string, exact?: boolean, runId?: string) => {
1515
if (JSON_MODE)
16-
return Promise.resolve(jsonProvider.getLatestBenchmarks(dbModelKeys, date, exact));
17-
return getLatestBenchmarks(getDb(), dbModelKeys, date, exact);
16+
return Promise.resolve(jsonProvider.getLatestBenchmarks(dbModelKeys, date, exact, runId));
17+
return getLatestBenchmarks(getDb(), dbModelKeys, date, exact, runId);
1818
},
1919
'benchmarks',
2020
{ blobOnly: true },
@@ -25,14 +25,17 @@ export async function GET(request: NextRequest) {
2525
const model = params.get('model') ?? '';
2626
const date = params.get('date') ?? undefined;
2727
const exact = params.get('exact') === 'true';
28+
// Numeric GitHub run id only — anything else is ignored (treated as "latest").
29+
const runIdParam = params.get('runId');
30+
const runId = runIdParam && /^\d+$/u.test(runIdParam) ? runIdParam : undefined;
2831
const dbModelKeys = DISPLAY_MODEL_TO_DB[model];
2932
if (!dbModelKeys || dbModelKeys.length === 0) {
3033
return NextResponse.json({ error: 'Unknown model' }, { status: 400 });
3134
}
3235
if (FIXTURES_MODE) return cachedJson(loadFixture('benchmarks'));
3336

3437
try {
35-
const rows = await getCachedBenchmarks(dbModelKeys, date, exact || undefined);
38+
const rows = await getCachedBenchmarks(dbModelKeys, date, exact || undefined, runId);
3639
return cachedJson(rows);
3740
} catch (error) {
3841
console.error('Error fetching benchmarks:', error);

packages/app/src/components/inference/InferenceContext.tsx

Lines changed: 41 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,43 @@ export function InferenceProvider({
202202
// ── Data fetching (gated by isActive) ──────────────────────────────────────
203203
const latestDate = availableDates.length > 0 ? availableDates.at(-1) : undefined;
204204

205+
// Runs available for the current model selection, and which one is selected.
206+
// Computed here (above useChartData) so the chart can query "as of" the selected
207+
// run. Re-exposed on the context value below.
208+
const modelPrefixes = useMemo(
209+
() =>
210+
Object.entries(MODEL_PREFIX_MAPPING)
211+
.filter(([, model]) => model === selectedModel)
212+
.map(([prefix]) => prefix),
213+
[selectedModel],
214+
);
215+
216+
const filteredAvailableRuns = useMemo(
217+
() => filterRunsByModel(availableRuns, modelPrefixes, [...effectivePrecisions]),
218+
[availableRuns, modelPrefixes, effectivePrecisions],
219+
);
220+
221+
const effectiveSelectedRunId = useMemo(() => {
222+
if (!filteredAvailableRuns) return selectedRunId;
223+
const filteredRunIds = Object.keys(filteredAvailableRuns);
224+
if (filteredRunIds.length === 0 || filteredRunIds.includes(selectedRunId)) return selectedRunId;
225+
return filteredRunIds.reduce((max, id) => (id > max ? id : max), filteredRunIds[0]);
226+
}, [filteredAvailableRuns, selectedRunId]);
227+
228+
// The latest run for this model on the selected date. GitHub run ids increase
229+
// monotonically with time, so the lexicographically-greatest id is the newest run.
230+
const latestRunIdForModel = useMemo(() => {
231+
const ids = filteredAvailableRuns ? Object.keys(filteredAvailableRuns) : [];
232+
return ids.length > 0 ? ids.reduce((max, id) => (id > max ? id : max), ids[0]) : '';
233+
}, [filteredAvailableRuns]);
234+
235+
// Only constrain the query when an earlier-than-latest run is selected; otherwise
236+
// the chart shows the full latest view (and reuses the materialized-view fast path).
237+
const asOfRunId =
238+
effectiveSelectedRunId && latestRunIdForModel && effectiveSelectedRunId !== latestRunIdForModel
239+
? effectiveSelectedRunId
240+
: undefined;
241+
205242
const {
206243
graphs,
207244
loading: chartDataLoading,
@@ -223,6 +260,7 @@ export function InferenceProvider({
223260
isActive,
224261
latestDate,
225262
compareGpuPair ?? null,
263+
asOfRunId,
226264
);
227265

228266
// For GPU comparison date picker — use shared availability data from global filters
@@ -699,14 +737,6 @@ export function InferenceProvider({
699737
setUserPowers((prev) => (prev === null ? prev : null));
700738
}, [selectedModel, effectiveSequence, effectivePrecisions, selectedYAxisMetric]);
701739

702-
const modelPrefixes = useMemo(
703-
() =>
704-
Object.entries(MODEL_PREFIX_MAPPING)
705-
.filter(([, model]) => model === selectedModel)
706-
.map(([prefix]) => prefix),
707-
[selectedModel],
708-
);
709-
710740
// ── Debounced GPU selection tracking ─────────────────────────────────────
711741
// Fire after 3s of no changes so we capture the "settled" selection.
712742
// Skip the first render (initial data load) to avoid noise.
@@ -922,19 +952,9 @@ export function InferenceProvider({
922952
}, [applyPreset]);
923953

924954
// ── Filtered runs ─────────────────────────────────────────────────────────
925-
926-
const filteredAvailableRuns = useMemo(
927-
() => filterRunsByModel(availableRuns, modelPrefixes, [...effectivePrecisions]),
928-
[availableRuns, modelPrefixes, effectivePrecisions],
929-
);
930-
931-
const effectiveSelectedRunId = useMemo(() => {
932-
if (!filteredAvailableRuns) return selectedRunId;
933-
const filteredRunIds = Object.keys(filteredAvailableRuns);
934-
if (filteredRunIds.length === 0 || filteredRunIds.includes(selectedRunId)) return selectedRunId;
935-
return filteredRunIds.reduce((max, id) => (id > max ? id : max), filteredRunIds[0]);
936-
}, [filteredAvailableRuns, selectedRunId]);
937-
955+
// filteredAvailableRuns / effectiveSelectedRunId are computed above the data
956+
// fetch (so the chart can query "as of" the selected run).
957+
//
938958
// NOTE: We intentionally do NOT sync effectiveSelectedRunId back to
939959
// GlobalFilterContext (setSelectedRunId). That would cause a full tree
940960
// re-render on every precision change because filteredAvailableRuns

packages/app/src/components/inference/hooks/useChartData.ts

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,20 +85,30 @@ export function useChartData(
8585
latestAvailableDate?: string,
8686
/** When set, only series for these two registry GPU keys are shown (compare pages). */
8787
compareGpuPair?: readonly [string, string] | null,
88+
/**
89+
* GitHub run id for the "as of run" view. Set only when an earlier-than-latest
90+
* run is selected; the chart then shows the data as it stood at that run.
91+
*/
92+
asOfRunId?: string,
8893
) {
8994
// When the selected date is the latest available, use '' (empty string) to match
9095
// the initial no-date query key, reusing the eagerly-fetched benchmarks from the
9196
// materialized view instead of firing a redundant second fetch with identical data.
92-
const queryDate =
93-
selectedRunDate && latestAvailableDate && selectedRunDate === latestAvailableDate
97+
//
98+
// The '' shortcut hits the materialized view, which has no run-level filter, so it
99+
// is only valid for the latest run. When an earlier run is selected (asOfRunId set)
100+
// we must query the date-filtered path so the run cutoff applies.
101+
const queryDate = asOfRunId
102+
? (selectedRunDate ?? '')
103+
: selectedRunDate && latestAvailableDate && selectedRunDate === latestAvailableDate
94104
? ''
95105
: selectedRunDate;
96106

97107
const {
98108
data: allRows,
99109
isLoading: queryLoading,
100110
error: queryError,
101-
} = useBenchmarks(selectedModel, queryDate, enabled);
111+
} = useBenchmarks(selectedModel, queryDate, enabled, asOfRunId);
102112

103113
// GPU comparison: fetch data for each additional comparison date
104114
const comparisonDates = useMemo(

packages/app/src/hooks/api/use-benchmarks.test.ts

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,41 @@ import { benchmarkQueryOptions } from '@/hooks/api/use-benchmarks';
55
describe('benchmarkQueryOptions', () => {
66
it('builds query key from model and date', () => {
77
const opts = benchmarkQueryOptions('DeepSeek-R1-0528', '2026-03-01');
8-
expect(opts.queryKey).toEqual(['benchmarks', 'DeepSeek-R1-0528', '2026-03-01', 'latest']);
8+
expect(opts.queryKey).toEqual([
9+
'benchmarks',
10+
'DeepSeek-R1-0528',
11+
'2026-03-01',
12+
'latest',
13+
'all',
14+
]);
915
});
1016

1117
it('builds exact query key when exact=true', () => {
1218
const opts = benchmarkQueryOptions('DeepSeek-R1-0528', '2026-03-01', true, true);
13-
expect(opts.queryKey).toEqual(['benchmarks', 'DeepSeek-R1-0528', '2026-03-01', 'exact']);
19+
expect(opts.queryKey).toEqual(['benchmarks', 'DeepSeek-R1-0528', '2026-03-01', 'exact', 'all']);
20+
});
21+
22+
it('includes the runId in the query key for the as-of-run view', () => {
23+
const opts = benchmarkQueryOptions(
24+
'DeepSeek-R1-0528',
25+
'2026-03-01',
26+
true,
27+
false,
28+
'27489075807',
29+
);
30+
expect(opts.queryKey).toEqual([
31+
'benchmarks',
32+
'DeepSeek-R1-0528',
33+
'2026-03-01',
34+
'latest',
35+
'27489075807',
36+
]);
37+
});
38+
39+
it('produces distinct keys for different runIds (no cache collision)', () => {
40+
const a = benchmarkQueryOptions('m', '2026-03-01', true, false, '100');
41+
const b = benchmarkQueryOptions('m', '2026-03-01', true, false, '101');
42+
expect(a.queryKey).not.toEqual(b.queryKey);
1443
});
1544

1645
it('produces distinct keys for different models', () => {

packages/app/src/hooks/api/use-benchmarks.ts

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,17 @@ export function benchmarkQueryOptions(
88
date: string,
99
enabled = true,
1010
exact?: boolean,
11+
/** GitHub run id for the "as of run" view (main chart only). */
12+
runId?: string,
1113
) {
1214
return {
13-
queryKey: ['benchmarks', model, date, exact ? 'exact' : 'latest'] as const,
14-
queryFn: ({ signal }: { signal: AbortSignal }) => fetchBenchmarks(model, date, exact, signal),
15+
queryKey: ['benchmarks', model, date, exact ? 'exact' : 'latest', runId ?? 'all'] as const,
16+
queryFn: ({ signal }: { signal: AbortSignal }) =>
17+
fetchBenchmarks(model, date, exact, signal, runId),
1518
enabled: enabled && Boolean(model),
1619
};
1720
}
1821

19-
export function useBenchmarks(model: string, date?: string, enabled = true) {
20-
return useQuery(benchmarkQueryOptions(model, date ?? 'latest', enabled));
22+
export function useBenchmarks(model: string, date?: string, enabled = true, runId?: string) {
23+
return useQuery(benchmarkQueryOptions(model, date ?? 'latest', enabled, undefined, runId));
2124
}

packages/app/src/lib/api.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,10 +126,12 @@ export function fetchBenchmarks(
126126
date?: string,
127127
exact?: boolean,
128128
signal?: AbortSignal,
129+
runId?: string,
129130
) {
130131
const params = new URLSearchParams({ model });
131132
if (date) params.set('date', date);
132133
if (exact) params.set('exact', 'true');
134+
if (runId) params.set('runId', runId);
133135
return fetchJson<BenchmarkRow[]>(`/api/v1/benchmarks?${params}`, signal);
134136
}
135137

0 commit comments

Comments
 (0)