SemiAnalysisAI
diff --git a/‎.claude/skills/write-inferencex-blog/SKILL.md‎
Lines changed: 1 addition & 1 deletion b/‎.claude/skills/write-inferencex-blog/SKILL.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/CODEOWNERS‎
Lines changed: 1 addition & 0 deletions b/‎.github/CODEOWNERS‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎packages/app/content/blog/gb300-nvl72-vs-gb200-nvl72-dsv4-pro-vllm-fp4.mdx‎
Lines changed: 181 additions & 0 deletions b/‎packages/app/content/blog/gb300-nvl72-vs-gb200-nvl72-dsv4-pro-vllm-fp4.mdx‎
Lines changed: 181 additions & 0 deletions
diff --git a/‎packages/app/public/images/gb300-nvl72-vs-gb200-nvl72-dsv4-pro-vllm-fp4/benchmark-dark.png‎
252 KB b/‎packages/app/public/images/gb300-nvl72-vs-gb200-nvl72-dsv4-pro-vllm-fp4/benchmark-dark.png‎
252 KB
diff --git a/‎packages/app/public/images/gb300-nvl72-vs-gb200-nvl72-dsv4-pro-vllm-fp4/benchmark-light.png‎
252 KB b/‎packages/app/public/images/gb300-nvl72-vs-gb200-nvl72-dsv4-pro-vllm-fp4/benchmark-light.png‎
252 KB
diff --git a/‎packages/app/public/images/gb300-nvl72-vs-gb200-nvl72-dsv4-pro-vllm-fp4/specs-radar-dark.png‎
458 KB b/‎packages/app/public/images/gb300-nvl72-vs-gb200-nvl72-dsv4-pro-vllm-fp4/specs-radar-dark.png‎
458 KB
diff --git a/‎packages/app/public/images/gb300-nvl72-vs-gb200-nvl72-dsv4-pro-vllm-fp4/specs-radar-light.png‎
458 KB b/‎packages/app/public/images/gb300-nvl72-vs-gb200-nvl72-dsv4-pro-vllm-fp4/specs-radar-light.png‎
458 KB
diff --git a/‎packages/app/src/components/inference/inference-chart-config.json‎
Lines changed: 8 additions & 0 deletions b/‎packages/app/src/components/inference/inference-chart-config.json‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎packages/app/src/components/inference/types.ts‎
Lines changed: 9 additions & 2 deletions b/‎packages/app/src/components/inference/types.ts‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎packages/app/src/components/inference/ui/ChartControls.tsx‎
Lines changed: 1 addition & 1 deletion b/‎packages/app/src/components/inference/ui/ChartControls.tsx‎
Lines changed: 1 addition & 1 deletion
@@ -36,7 +36,7 @@ Common gotchas:
 - **Workload mismatch**: chart headers can mislead. Verify ISL/OSL from the data itself — 1k/1k and 8k/1k give wildly different `tok/s/GPU` and `$/M tokens` numbers. The blog title, lede, tables, and chart caption must all use the same ISL/OSL.
 - **Latest run only**: filter to the highest `run_attempt` per `github_run_id`, then take the latest `date` per `(config_id, conc, isl, osl)`. See the `inferencex-data` skill for the exact filter.
 - **Model spec verification**: never invent parameter counts. Always `WebSearch` the model's released specs (total params, active params, expert count, attention type) before writing the architecture paragraph. Cite sources. GLM-5 is _not_ GLM-4.5 — the numbers changed.
-- **TCO values**: pull from the [SemiAnalysis AI Cloud TCO Model](https://newsletter.semianalysis.com/p/ai-cloud-economics). Current values (verify if older than a quarter):
+- **TCO values**: pull from the [SemiAnalysis AI Cloud TCO Model](https://semianalysis.com/ai-cloud-tco-model/). Current values (verify if older than a quarter):
   - H100 $1.30, H200 $1.41, B200 $1.95, B300 $2.34, GB200 $2.21, GB300 $2.652
   - MI300X $1.12, MI325X $1.28, MI355X $1.48
 - **Cost per million tokens formula**: `$/M tok = TCO_$/GPU/hr * 1e6 / (3600 * tput_per_gpu)`. Equivalently in Python: `cost = tco / (3600 * tput / 1e6)`. Throughput is per-GPU, so GPU count cancels out for aggregated configs.
 
@@ -0,0 +1 @@
+* @adibarra
@@ -95,6 +95,10 @@
     "y_measuredJPerOutputToken_label": "Measured J per Output Token (J/tok)",
     "y_measuredJPerOutputToken_title": "Measured Joules per Output Token",
     "y_measuredJPerOutputToken_roofline": "lower_right",
+    "y_measuredJPerTotalToken": "measuredJPerTotalToken.y",
+    "y_measuredJPerTotalToken_label": "Measured J per Token (J/tok)",
+    "y_measuredJPerTotalToken_title": "Measured Joules per Token (incl. prompt)",
+    "y_measuredJPerTotalToken_roofline": "lower_right",
     "y_cost_limit": 5,
     "y_latency_limit": 60
   },
@@ -193,6 +197,10 @@
     "y_measuredJPerOutputToken_label": "Measured J per Output Token (J/tok)",
     "y_measuredJPerOutputToken_title": "Measured Joules per Output Token",
     "y_measuredJPerOutputToken_roofline": "lower_left",
+    "y_measuredJPerTotalToken": "measuredJPerTotalToken.y",
+    "y_measuredJPerTotalToken_label": "Measured J per Token (J/tok)",
+    "y_measuredJPerTotalToken_title": "Measured Joules per Token (incl. prompt)",
+    "y_measuredJPerTotalToken_roofline": "lower_left",
     "y_cost_limit": 5,
     "y_latency_limit": 60
   }
 
@@ -68,9 +68,10 @@ export interface AggDataEntry {
   std_e2el: number;
   p99_e2el: number;
   // Measured GPU telemetry (emitted by runner's aggregate_power.py).
-  // Optional because historical runs predate the field.
+  // Optional because historical runs predate the fields.
   avg_power_w?: number;
   joules_per_output_token?: number;
+  joules_per_total_token?: number;
   disagg: boolean;
   num_prefill_gpu: number;
   num_decode_gpu: number;
@@ -162,6 +163,7 @@ export interface InferenceData extends Partial<Omit<AggDataEntry, AggDataConflic
   // emit these fields.
   measuredAvgPower?: { y: number; roof: boolean };
   measuredJPerOutputToken?: { y: number; roof: boolean };
+  measuredJPerTotalToken?: { y: number; roof: boolean };
 }
 
 /**
@@ -189,7 +191,8 @@ export type YAxisMetricKey =
   | 'jOutput'
   | 'jInput'
   | 'measuredAvgPower'
-  | 'measuredJPerOutputToken';
+  | 'measuredJPerOutputToken'
+  | 'measuredJPerTotalToken';
 
 /**
  * Defines the configuration and labels for a specific chart.
@@ -302,6 +305,10 @@ export interface ChartDefinition {
   y_measuredJPerOutputToken_label?: string;
   y_measuredJPerOutputToken_title?: string;
   y_measuredJPerOutputToken_roofline?: 'upper_right' | 'upper_left' | 'lower_left' | 'lower_right';
+  y_measuredJPerTotalToken?: string;
+  y_measuredJPerTotalToken_label?: string;
+  y_measuredJPerTotalToken_title?: string;
+  y_measuredJPerTotalToken_roofline?: 'upper_right' | 'upper_left' | 'lower_left' | 'lower_right';
   y_cost_limit?: number;
   y_latency_limit?: number;
 }
 
@@ -56,7 +56,7 @@ const METRIC_GROUPS: { label: string; metrics: string[]; gated?: boolean }[] = [
   { label: 'All-in Provisioned Energy per Token', metrics: ['y_jTotal', 'y_jOutput', 'y_jInput'] },
   {
     label: 'Measured Energy',
-    metrics: ['y_measuredAvgPower', 'y_measuredJPerOutputToken'],
+    metrics: ['y_measuredAvgPower', 'y_measuredJPerOutputToken', 'y_measuredJPerTotalToken'],
     gated: true,
   },
   { label: 'Custom User Values', metrics: ['y_costUser', 'y_powerUser'] },
Original file line number	Diff line number	Diff line change
`@@ -56,7 +56,7 @@ const METRIC_GROUPS: { label: string; metrics: string[]; gated?: boolean }[] = [`
`56`	`56`	`{ label: 'All-in Provisioned Energy per Token', metrics: ['y_jTotal', 'y_jOutput', 'y_jInput'] },`
`57`	`57`	`{`
`58`	`58`	`label: 'Measured Energy',`
`59`		`- metrics: ['y_measuredAvgPower', 'y_measuredJPerOutputToken'],`
	`59`	`+ metrics: ['y_measuredAvgPower', 'y_measuredJPerOutputToken', 'y_measuredJPerTotalToken'],`
`60`	`60`	`gated: true,`
`61`	`61`	`},`
`62`	`62`	`{ label: 'Custom User Values', metrics: ['y_costUser', 'y_powerUser'] },`