Skip to content

Commit 42fdf80

Browse files
committed
feat(inference): add "Measured J per Token" metric (input + output denominator)
Adds a third option to the gated "Measured Energy" dropdown group: - Measured J per Token (J/total-token: system energy / (input + output)) Distinct from the existing "Measured J per Output Token" which divides only by output tokens (treating the prompt as free). For workload-shape-fair comparisons — especially with prompt-heavy workloads like 8k/1k where J/output-token is ~9x higher than J/total-token despite the same energy. Wires the new field through the same plumbing as the existing measured- power metrics: - packages/constants/src/metric-keys.ts: register joules_per_total_token - packages/app/src/lib/benchmark-transform.ts: pass through (left undefined for legacy rows) - packages/app/src/components/inference/types.ts: extend AggDataEntry, InferenceData, YAxisMetricKey, ChartDefinition - packages/app/src/lib/chart-utils.ts: extend Y_AXIS_METRICS, createChartDataPoint, roofline union, markRooflinePoints - packages/app/src/components/inference/inference-chart-config.json: add y_measuredJPerTotalToken to both chartTypes (roofline lower_right / lower_left) - packages/app/src/components/inference/ui/ChartControls.tsx: add to the Measured Energy gated group Companion runner-side change: SemiAnalysisAI/InferenceX@363e49c4 emits joules_per_total_token in every agg_<run>.json. Tests: +3 covering the new field (presence, parallel independence from J/output-token, graceful absence on legacy rows). 1944/1944 vitest pass.
1 parent 72156db commit 42fdf80

7 files changed

Lines changed: 61 additions & 6 deletions

File tree

packages/app/src/components/inference/inference-chart-config.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,10 @@
9595
"y_measuredJPerOutputToken_label": "Measured J per Output Token (J/tok)",
9696
"y_measuredJPerOutputToken_title": "Measured Joules per Output Token",
9797
"y_measuredJPerOutputToken_roofline": "lower_right",
98+
"y_measuredJPerTotalToken": "measuredJPerTotalToken.y",
99+
"y_measuredJPerTotalToken_label": "Measured J per Token (J/tok)",
100+
"y_measuredJPerTotalToken_title": "Measured Joules per Token (input + output)",
101+
"y_measuredJPerTotalToken_roofline": "lower_right",
98102
"y_cost_limit": 5,
99103
"y_latency_limit": 60
100104
},
@@ -193,6 +197,10 @@
193197
"y_measuredJPerOutputToken_label": "Measured J per Output Token (J/tok)",
194198
"y_measuredJPerOutputToken_title": "Measured Joules per Output Token",
195199
"y_measuredJPerOutputToken_roofline": "lower_left",
200+
"y_measuredJPerTotalToken": "measuredJPerTotalToken.y",
201+
"y_measuredJPerTotalToken_label": "Measured J per Token (J/tok)",
202+
"y_measuredJPerTotalToken_title": "Measured Joules per Token (input + output)",
203+
"y_measuredJPerTotalToken_roofline": "lower_left",
196204
"y_cost_limit": 5,
197205
"y_latency_limit": 60
198206
}

packages/app/src/components/inference/types.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,10 @@ export interface AggDataEntry {
6868
std_e2el: number;
6969
p99_e2el: number;
7070
// Measured GPU telemetry (emitted by runner's aggregate_power.py).
71-
// Optional because historical runs predate the field.
71+
// Optional because historical runs predate the fields.
7272
avg_power_w?: number;
7373
joules_per_output_token?: number;
74+
joules_per_total_token?: number;
7475
disagg: boolean;
7576
num_prefill_gpu: number;
7677
num_decode_gpu: number;
@@ -162,6 +163,7 @@ export interface InferenceData extends Partial<Omit<AggDataEntry, AggDataConflic
162163
// emit these fields.
163164
measuredAvgPower?: { y: number; roof: boolean };
164165
measuredJPerOutputToken?: { y: number; roof: boolean };
166+
measuredJPerTotalToken?: { y: number; roof: boolean };
165167
}
166168

167169
/**
@@ -189,7 +191,8 @@ export type YAxisMetricKey =
189191
| 'jOutput'
190192
| 'jInput'
191193
| 'measuredAvgPower'
192-
| 'measuredJPerOutputToken';
194+
| 'measuredJPerOutputToken'
195+
| 'measuredJPerTotalToken';
193196

194197
/**
195198
* Defines the configuration and labels for a specific chart.
@@ -302,6 +305,10 @@ export interface ChartDefinition {
302305
y_measuredJPerOutputToken_label?: string;
303306
y_measuredJPerOutputToken_title?: string;
304307
y_measuredJPerOutputToken_roofline?: 'upper_right' | 'upper_left' | 'lower_left' | 'lower_right';
308+
y_measuredJPerTotalToken?: string;
309+
y_measuredJPerTotalToken_label?: string;
310+
y_measuredJPerTotalToken_title?: string;
311+
y_measuredJPerTotalToken_roofline?: 'upper_right' | 'upper_left' | 'lower_left' | 'lower_right';
305312
y_cost_limit?: number;
306313
y_latency_limit?: number;
307314
}

packages/app/src/components/inference/ui/ChartControls.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ const METRIC_GROUPS: { label: string; metrics: string[]; gated?: boolean }[] = [
5656
{ label: 'All-in Provisioned Energy per Token', metrics: ['y_jTotal', 'y_jOutput', 'y_jInput'] },
5757
{
5858
label: 'Measured Energy',
59-
metrics: ['y_measuredAvgPower', 'y_measuredJPerOutputToken'],
59+
metrics: ['y_measuredAvgPower', 'y_measuredJPerOutputToken', 'y_measuredJPerTotalToken'],
6060
gated: true,
6161
},
6262
{ label: 'Custom User Values', metrics: ['y_costUser', 'y_powerUser'] },

packages/app/src/lib/benchmark-transform.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ export function rowToAggDataEntry(row: BenchmarkRow): AggDataEntry {
5454
// "no measurement" from "0 W" via createChartDataPoint's typeof guard.
5555
avg_power_w: m.avg_power_w,
5656
joules_per_output_token: m.joules_per_output_token,
57+
joules_per_total_token: m.joules_per_total_token,
5758
disagg: row.disagg,
5859
num_prefill_gpu: row.num_prefill_gpu,
5960
num_decode_gpu: row.num_decode_gpu,

packages/app/src/lib/chart-utils.test.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1265,6 +1265,33 @@ describe('createChartDataPoint measured power fields', () => {
12651265
expect(point.measuredAvgPower).toBeDefined();
12661266
expect(point.measuredAvgPower!.y).toBe(0);
12671267
});
1268+
1269+
it('emits measuredJPerTotalToken when joules_per_total_token is present', () => {
1270+
const e = entry({ joules_per_total_token: 0.93 });
1271+
const point = createChartDataPoint('2025-01-01', e, 'median_e2el', 'tput_per_gpu', 'h100');
1272+
expect(point.measuredJPerTotalToken).toBeDefined();
1273+
expect(point.measuredJPerTotalToken!.y).toBe(0.93);
1274+
expect(point.measuredJPerTotalToken!.roof).toBe(false);
1275+
});
1276+
1277+
it('emits J/output and J/total independently — different denominators', () => {
1278+
// 8k1k workload: J/output ≈ 9 × J/total (input is ~8x output, so output/total ≈ 1/9).
1279+
const e = entry({ joules_per_output_token: 2.04, joules_per_total_token: 0.23 });
1280+
const point = createChartDataPoint('2025-01-01', e, 'median_e2el', 'tput_per_gpu', 'h100');
1281+
expect(point.measuredJPerOutputToken!.y).toBe(2.04);
1282+
expect(point.measuredJPerTotalToken!.y).toBe(0.23);
1283+
});
1284+
1285+
it('omits measuredJPerTotalToken on rows that predate the field', () => {
1286+
// Rows ingested before joules_per_total_token was added still have avg_power_w
1287+
// and joules_per_output_token. The new field must be absent (not 0) so the
1288+
// chart correctly drops them from the J/total view rather than plotting fake data.
1289+
const e = entry({ avg_power_w: 458, joules_per_output_token: 2.04 });
1290+
const point = createChartDataPoint('2025-01-01', e, 'median_e2el', 'tput_per_gpu', 'h100');
1291+
expect(point.measuredAvgPower).toBeDefined();
1292+
expect(point.measuredJPerOutputToken).toBeDefined();
1293+
expect(point.measuredJPerTotalToken).toBeUndefined();
1294+
});
12681295
});
12691296

12701297
// ===========================================================================

packages/app/src/lib/chart-utils.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ export const Y_AXIS_METRICS = [
152152
// distinct from the spec-sheet TDP-derived jTotal/jOutput/jInput above).
153153
'y_measuredAvgPower',
154154
'y_measuredJPerOutputToken',
155+
'y_measuredJPerTotalToken',
155156
] as const;
156157

157158
export type YAxisMetric = (typeof Y_AXIS_METRICS)[number];
@@ -403,6 +404,9 @@ export function createChartDataPoint(
403404
...(typeof entry.joules_per_output_token === 'number'
404405
? { measuredJPerOutputToken: { y: entry.joules_per_output_token, roof: false } }
405406
: {}),
407+
...(typeof entry.joules_per_total_token === 'number'
408+
? { measuredJPerTotalToken: { y: entry.joules_per_total_token, roof: false } }
409+
: {}),
406410
};
407411
}
408412

@@ -565,7 +569,8 @@ export const calculateRoofline = (
565569
| `jOutput.y`
566570
| `jInput.y`
567571
| `measuredAvgPower.y`
568-
| `measuredJPerOutputToken.y`,
572+
| `measuredJPerOutputToken.y`
573+
| `measuredJPerTotalToken.y`,
569574
rooflineDirection: 'upper_right' | 'upper_left' | 'lower_left' | 'lower_right',
570575
): InferenceData[] => {
571576
const pointsForRoofline = points.map((p) => {
@@ -637,7 +642,8 @@ export function computeAllRooflines(
637642
| `jOutput.y`
638643
| `jInput.y`
639644
| `measuredAvgPower.y`
640-
| `measuredJPerOutputToken.y`,
645+
| `measuredJPerOutputToken.y`
646+
| `measuredJPerTotalToken.y`,
641647
rooflineDirection,
642648
);
643649
}
@@ -683,6 +689,7 @@ export function markRooflinePoints(
683689
if (newPoint.jInput) newPoint.jInput.roof = false;
684690
if (newPoint.measuredAvgPower) newPoint.measuredAvgPower.roof = false;
685691
if (newPoint.measuredJPerOutputToken) newPoint.measuredJPerOutputToken.roof = false;
692+
if (newPoint.measuredJPerTotalToken) newPoint.measuredJPerTotalToken.roof = false;
686693

687694
for (const chartDefYKey of Y_AXIS_METRICS) {
688695
const rooflinePoints = computedRooflines[hwKey]?.[chartDefYKey];
@@ -749,6 +756,8 @@ export function markRooflinePoints(
749756
newPoint.measuredJPerOutputToken
750757
) {
751758
newPoint.measuredJPerOutputToken.roof = onCurrentRoofline;
759+
} else if (chartDefYKey === 'y_measuredJPerTotalToken' && newPoint.measuredJPerTotalToken) {
760+
newPoint.measuredJPerTotalToken.roof = onCurrentRoofline;
752761
}
753762
}
754763
finalProcessedData.push(newPoint);

packages/constants/src/metric-keys.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,11 @@ export const METRIC_KEYS = new Set([
4444
'p99.9_intvty',
4545
'std_intvty',
4646
// measured power / energy (emitted by runner's aggregate_power.py)
47-
// avg_power_w: mean per-GPU draw (W) during the load window
47+
// avg_power_w: mean per-GPU draw (W) during the load window
4848
// joules_per_output_token: avg_power_w * num_gpus * duration / total_output_tokens
49+
// joules_per_total_token: avg_power_w * num_gpus * duration / (total_input + total_output)
50+
// — workload-shape-fair view that doesn't treat prompt as free
4951
'avg_power_w',
5052
'joules_per_output_token',
53+
'joules_per_total_token',
5154
]);

0 commit comments

Comments
 (0)