Skip to content

Commit 5e46bc3

Browse files
feat(environment): add benchmark env schema and end to end flow
1 parent 67c3038 commit 5e46bc3

26 files changed

Lines changed: 1357 additions & 82 deletions

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
"admin:db:load-dump": "pnpm --filter *db db:load-dump --",
3838
"admin:db:migrate": "pnpm --filter *db db:migrate",
3939
"admin:db:apply-overrides": "pnpm --filter *db db:apply-overrides",
40+
"admin:db:backfill:envs": "pnpm --filter *db db:backfill:envs",
4041
"admin:db:reset": "pnpm --filter *db db:reset",
4142
"admin:db:verify": "pnpm --filter *db db:verify"
4243
},

packages/app/cypress/e2e/reproduce-drawer.cy.ts

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ describe('Reproduce drawer', () => {
1515
.should('have.length.greaterThan', 0);
1616
});
1717

18-
it('opens from the inference table Reproduce button and shows the three tabs', () => {
18+
it('opens from clicking an inference table row and shows the three tabs', () => {
1919
cy.get('[data-testid="inference-table-view-btn"]').first().click();
2020
cy.get('[data-testid="inference-results-table"]').should('be.visible');
21-
cy.get('[data-testid="inference-table-reproduce-btn"]').first().click();
21+
cy.get('[data-testid="inference-results-table"] tbody tr').first().click();
2222

2323
cy.get('[data-testid="reproduce-drawer"]').should('be.visible');
2424
cy.contains('Reproduce this benchmark').should('be.visible');
@@ -29,7 +29,7 @@ describe('Reproduce drawer', () => {
2929

3030
it('exposes a copy button on every tab', () => {
3131
cy.get('[data-testid="inference-table-view-btn"]').first().click();
32-
cy.get('[data-testid="inference-table-reproduce-btn"]').first().click();
32+
cy.get('[data-testid="inference-results-table"] tbody tr').first().click();
3333
cy.get('[data-testid="reproduce-drawer-copy"]').should('be.visible');
3434
cy.contains('button', 'Config JSON').click();
3535
cy.get('[data-testid="reproduce-drawer-copy"]').should('be.visible');
@@ -39,7 +39,7 @@ describe('Reproduce drawer', () => {
3939

4040
it('Config JSON tab shows config fields and excludes result metrics', () => {
4141
cy.get('[data-testid="inference-table-view-btn"]').first().click();
42-
cy.get('[data-testid="inference-table-reproduce-btn"]').first().click();
42+
cy.get('[data-testid="inference-results-table"] tbody tr').first().click();
4343
cy.contains('button', 'Config JSON').click();
4444
cy.get('[data-testid="reproduce-drawer"]')
4545
.find('pre')
@@ -57,10 +57,34 @@ describe('Reproduce drawer', () => {
5757
});
5858
});
5959

60+
it('Environment tab renders structured rows including env-only fields with graceful fallback', () => {
61+
cy.get('[data-testid="inference-table-view-btn"]').first().click();
62+
cy.get('[data-testid="inference-results-table"] tbody tr').first().click();
63+
cy.contains('button', 'Environment').click();
64+
// Core rows are always rendered. The values come from /api/v1/run-environment
65+
// when available; otherwise they show "(not recorded)" — we assert the
66+
// labels exist either way so a regression that drops a row is caught.
67+
const labels = [
68+
'GPU',
69+
'GPU SKU',
70+
'Framework',
71+
'Framework version',
72+
'Framework SHA',
73+
'Container image',
74+
'Driver',
75+
'CUDA',
76+
'PyTorch',
77+
'Python',
78+
];
79+
for (const label of labels) {
80+
cy.get('[data-testid="reproduce-drawer"]').contains('dt', label).should('be.visible');
81+
}
82+
});
83+
6084
it('Esc closes the drawer without changing the URL hash', () => {
6185
cy.get('[data-testid="inference-table-view-btn"]').first().click();
6286
cy.url().then((before) => {
63-
cy.get('[data-testid="inference-table-reproduce-btn"]').first().click();
87+
cy.get('[data-testid="inference-results-table"] tbody tr').first().click();
6488
cy.get('[data-testid="reproduce-drawer"]').should('be.visible');
6589
cy.get('body').type('{esc}');
6690
cy.get('[data-testid="reproduce-drawer"]').should('not.exist');
@@ -72,7 +96,7 @@ describe('Reproduce drawer', () => {
7296
// Re-visit with the overlay query param. We do NOT assert which row is
7397
// rendered — we only assert the drawer can be opened from whatever points
7498
// appear for the official path on top of the overlay. The wiring is the
75-
// same code path: clicking a Reproduce control feeds the InferenceData
99+
// same code path: clicking an inference table row feeds the InferenceData
76100
// through to the drawer regardless of where the row originated.
77101
const candidateRunId = '15000000000';
78102
cy.visit(`/inference?unofficialrun=${candidateRunId}`);
@@ -82,7 +106,7 @@ describe('Reproduce drawer', () => {
82106
.should('have.length.greaterThan', 0);
83107
cy.get('[data-testid="inference-table-view-btn"]').first().click();
84108
cy.get('[data-testid="inference-results-table"]').should('be.visible');
85-
cy.get('[data-testid="inference-table-reproduce-btn"]').first().click();
109+
cy.get('[data-testid="inference-results-table"] tbody tr').first().click();
86110
cy.get('[data-testid="reproduce-drawer"]').should('be.visible');
87111
// Same Config JSON guarantee for the overlay path — the drawer renders
88112
// overlay points through the same `InferenceData` shape, so result-metric
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import { describe, expect, it, vi, beforeEach } from 'vitest';
2+
3+
const { mockGetEnvironment, mockGetDb } = vi.hoisted(() => ({
4+
mockGetEnvironment: vi.fn(),
5+
mockGetDb: vi.fn(() => 'mock-sql'),
6+
}));
7+
8+
vi.mock('@semianalysisai/inferencex-db/connection', () => ({
9+
getDb: mockGetDb,
10+
JSON_MODE: false,
11+
FIXTURES_MODE: false,
12+
}));
13+
14+
vi.mock('@semianalysisai/inferencex-db/queries/environments', () => ({
15+
getEnvironmentForRunConfig: mockGetEnvironment,
16+
}));
17+
18+
vi.mock('@/lib/api-cache', () => ({
19+
cachedQuery: (fn: (...args: any[]) => any) => fn,
20+
cachedJson: (data: unknown) => Response.json(data),
21+
}));
22+
23+
import { GET } from './route';
24+
import { NextRequest } from 'next/server';
25+
26+
function req(url: string): NextRequest {
27+
return new NextRequest(new URL(url, 'http://localhost'));
28+
}
29+
30+
beforeEach(() => {
31+
vi.clearAllMocks();
32+
});
33+
34+
const env = {
35+
source: 'env_json' as const,
36+
image: 'lmsysorg/sglang:latest',
37+
framework_version: '0.4.3.post2',
38+
framework_sha: 'e136d70cdc6101007017c05d57fb4cec5d6ed98f',
39+
torch_version: '2.5.1+cu124',
40+
python_version: '3.12.7',
41+
cuda_version: '12.4',
42+
rocm_version: null,
43+
driver_version: '560.35.03',
44+
gpu_sku: 'NVIDIA H100 80GB HBM3',
45+
extra: {},
46+
};
47+
48+
const VALID_QS = 'workflow_run_id=101&config_id=42';
49+
50+
describe('GET /api/v1/run-environment', () => {
51+
it('returns 400 when workflow_run_id is missing', async () => {
52+
const res = await GET(req('/api/v1/run-environment?config_id=42'));
53+
expect(res.status).toBe(400);
54+
});
55+
56+
it('returns 400 when config_id is missing', async () => {
57+
const res = await GET(req('/api/v1/run-environment?workflow_run_id=101'));
58+
expect(res.status).toBe(400);
59+
});
60+
61+
it('returns 400 when params are non-numeric', async () => {
62+
const res = await GET(req('/api/v1/run-environment?workflow_run_id=abc&config_id=xyz'));
63+
expect(res.status).toBe(400);
64+
});
65+
66+
it('returns 404 when no environment row exists', async () => {
67+
mockGetEnvironment.mockResolvedValueOnce(null);
68+
const res = await GET(req(`/api/v1/run-environment?${VALID_QS}`));
69+
expect(res.status).toBe(404);
70+
});
71+
72+
it('returns env_json environment for valid (workflow_run_id, config_id)', async () => {
73+
mockGetEnvironment.mockResolvedValueOnce(env);
74+
const res = await GET(req(`/api/v1/run-environment?${VALID_QS}`));
75+
expect(res.status).toBe(200);
76+
const body = await res.json();
77+
expect(body).toEqual({ workflow_run_id: 101, config_id: 42, environment: env });
78+
expect(mockGetEnvironment).toHaveBeenCalledWith('mock-sql', 101, 42);
79+
});
80+
81+
it('returns log_parse environment with nulls preserved', async () => {
82+
mockGetEnvironment.mockResolvedValueOnce({
83+
...env,
84+
source: 'log_parse',
85+
framework_sha: null,
86+
driver_version: null,
87+
cuda_version: null,
88+
gpu_sku: null,
89+
});
90+
const res = await GET(req(`/api/v1/run-environment?${VALID_QS}`));
91+
expect(res.status).toBe(200);
92+
const body = await res.json();
93+
expect(body.environment.source).toBe('log_parse');
94+
expect(body.environment.framework_sha).toBeNull();
95+
expect(body.environment.driver_version).toBeNull();
96+
});
97+
98+
it('returns 500 when query throws', async () => {
99+
mockGetEnvironment.mockRejectedValueOnce(new Error('Connection reset'));
100+
const res = await GET(req(`/api/v1/run-environment?${VALID_QS}`));
101+
expect(res.status).toBe(500);
102+
});
103+
});
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import { type NextRequest, NextResponse } from 'next/server';
2+
3+
import { JSON_MODE, getDb } from '@semianalysisai/inferencex-db/connection';
4+
import * as jsonProvider from '@semianalysisai/inferencex-db/json-provider';
5+
import { getEnvironmentForRunConfig } from '@semianalysisai/inferencex-db/queries/environments';
6+
7+
import { cachedJson, cachedQuery } from '@/lib/api-cache';
8+
9+
export const dynamic = 'force-dynamic';
10+
11+
const getCachedEnvironment = cachedQuery(
12+
(workflowRunId: number, configId: number) => {
13+
if (JSON_MODE) {
14+
return Promise.resolve(jsonProvider.getEnvironmentForRunConfig(workflowRunId, configId));
15+
}
16+
return getEnvironmentForRunConfig(getDb(), workflowRunId, configId);
17+
},
18+
'run-environment',
19+
{ blobOnly: true },
20+
);
21+
22+
export async function GET(request: NextRequest) {
23+
const params = request.nextUrl.searchParams;
24+
const workflowRunId = Number(params.get('workflow_run_id'));
25+
const configId = Number(params.get('config_id'));
26+
27+
if (!workflowRunId || !Number.isFinite(workflowRunId)) {
28+
return NextResponse.json(
29+
{ error: 'workflow_run_id is required (positive integer)' },
30+
{ status: 400 },
31+
);
32+
}
33+
if (!configId || !Number.isFinite(configId)) {
34+
return NextResponse.json(
35+
{ error: 'config_id is required (positive integer)' },
36+
{ status: 400 },
37+
);
38+
}
39+
40+
try {
41+
const env = await getCachedEnvironment(workflowRunId, configId);
42+
if (env === null) {
43+
return NextResponse.json({ error: 'Not found' }, { status: 404 });
44+
}
45+
return cachedJson({ workflow_run_id: workflowRunId, config_id: configId, environment: env });
46+
} catch (error) {
47+
console.error('Error fetching benchmark environment:', error);
48+
return NextResponse.json({ error: 'Internal server error' }, { status: 500 });
49+
}
50+
}

packages/app/src/components/inference/types.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,14 @@ export interface AggDataEntry {
8888
actualDate?: string;
8989
/** URL to the GitHub Actions workflow run that produced this data point. */
9090
run_url?: string;
91+
/**
92+
* Natural-key halves for the Reproduce Drawer's Environment tab — together
93+
* they key `/api/v1/run-environment`. Both are optional because client-only
94+
* synthetic points (e.g. overlay rooflines, unofficial-run rows) don't
95+
* originate from a DB row.
96+
*/
97+
workflowRunId?: number;
98+
configId?: number;
9199
}
92100

93101
/**

packages/app/src/components/inference/ui/InferenceTable.tsx

Lines changed: 11 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
'use client';
22

33
import { useMemo } from 'react';
4-
import { Wrench } from 'lucide-react';
54

65
import { useInference } from '@/components/inference/InferenceContext';
76
import type { ChartDefinition, InferenceData } from '@/components/inference/types';
@@ -114,34 +113,8 @@ export default function InferenceTable({
114113
sortValue: (row) => row.median_intvty ?? 0,
115114
className: 'tabular-nums',
116115
},
117-
{
118-
header: '',
119-
align: 'center',
120-
cell: (row) => (
121-
<button
122-
type="button"
123-
onClick={() => {
124-
track('inference_table_reproduce_clicked', {
125-
framework: row.framework,
126-
hwKey: row.hwKey,
127-
precision: row.precision,
128-
tp: row.tp,
129-
conc: row.conc,
130-
});
131-
openReproduceDrawer(row, 'inference_table');
132-
}}
133-
className="inline-flex items-center gap-1 rounded-md border border-border px-2 py-0.5 text-[11px] hover:bg-muted"
134-
data-testid="inference-table-reproduce-btn"
135-
aria-label="Reproduce this benchmark"
136-
>
137-
<Wrench className="size-3" aria-hidden="true" />
138-
Reproduce
139-
</button>
140-
),
141-
className: 'whitespace-nowrap',
142-
},
143116
],
144-
[yPath, yLabel, xLabel, openReproduceDrawer],
117+
[yPath, yLabel, xLabel],
145118
);
146119

147120
return (
@@ -150,6 +123,16 @@ export default function InferenceTable({
150123
columns={columns}
151124
testId="inference-results-table"
152125
analyticsPrefix="inference_table"
126+
onRowClick={(row) => {
127+
track('inference_table_reproduce_clicked', {
128+
framework: row.framework,
129+
hwKey: row.hwKey,
130+
precision: row.precision,
131+
tp: row.tp,
132+
conc: row.conc,
133+
});
134+
openReproduceDrawer(row, 'inference_table');
135+
}}
153136
/>
154137
);
155138
}

0 commit comments

Comments
 (0)