add CSV export tests for all chart types (#48)

adibarra · web-flow · commit 00a742e86e8d · 2026-03-20T21:57:13.000-05:00
diff --git a/packages/app/src/lib/csv-export-helpers.test.ts b/packages/app/src/lib/csv-export-helpers.test.ts
@@ -121,6 +121,59 @@ describe('inferenceChartToCsv', () => {
     expect(rows).toHaveLength(0);
   });
 
+  it('only includes data matching selected precisions when pre-filtered (mirrors ChartDisplay export)', () => {
+    const fp4Point = makePoint({ hwKey: 'h100-sxm-sglang', precision: 'fp4' });
+    const fp8Point = makePoint({ hwKey: 'h100-sxm-sglang', precision: 'fp8' });
+    const allData = [fp4Point, fp8Point];
+
+    // Simulate ChartDisplay.tsx onExportCsv filter
+    const activeHwTypes = new Set(['h100-sxm-sglang']);
+    const selectedPrecisions = ['fp4'];
+    const visibleData = allData.filter(
+      (d) => activeHwTypes.has(d.hwKey as string) && selectedPrecisions.includes(d.precision),
+    );
+
+    const { headers, rows } = inferenceChartToCsv(visibleData, 'llama-3.1-405b', '1k/1k');
+    expect(rows).toHaveLength(1);
+    expect(rows[0][headers.indexOf('Precision')]).toBe('fp4');
+  });
+
+  it('filters by both GPU and precision (mirrors ChartDisplay export)', () => {
+    const data = [
+      makePoint({ hwKey: 'h100-sxm-sglang', precision: 'fp4' }),
+      makePoint({ hwKey: 'h100-sxm-sglang', precision: 'fp8' }),
+      makePoint({ hwKey: 'b200-sxm-sglang', precision: 'fp4' }),
+      makePoint({ hwKey: 'b200-sxm-sglang', precision: 'fp8' }),
+    ];
+
+    const activeHwTypes = new Set(['h100-sxm-sglang']);
+    const selectedPrecisions = ['fp4'];
+    const visibleData = data.filter(
+      (d) => activeHwTypes.has(d.hwKey as string) && selectedPrecisions.includes(d.precision),
+    );
+
+    const { headers, rows } = inferenceChartToCsv(visibleData, 'llama-3.1-405b', '1k/1k');
+    expect(rows).toHaveLength(1);
+    expect(rows[0][headers.indexOf('Hardware Key')]).toBe('h100-sxm-sglang');
+    expect(rows[0][headers.indexOf('Precision')]).toBe('fp4');
+  });
+
+  it('includes multiple precisions when all are selected', () => {
+    const data = [
+      makePoint({ hwKey: 'h100-sxm-sglang', precision: 'fp4' }),
+      makePoint({ hwKey: 'h100-sxm-sglang', precision: 'fp8' }),
+    ];
+
+    const activeHwTypes = new Set(['h100-sxm-sglang']);
+    const selectedPrecisions = ['fp4', 'fp8'];
+    const visibleData = data.filter(
+      (d) => activeHwTypes.has(d.hwKey as string) && selectedPrecisions.includes(d.precision),
+    );
+
+    const { rows } = inferenceChartToCsv(visibleData, 'llama-3.1-405b', '1k/1k');
+    expect(rows).toHaveLength(2);
+  });
+
   it('uses empty string for missing optional fields', () => {
     // Minimal point — most AggDataEntry fields are optional via Partial
     const data = [
@@ -153,7 +206,7 @@ describe('inferenceChartToCsv', () => {
   });
 });
 
-describe('reliabilityChartToCsv', () => {
+describe('reliabilityChartToCsv (mirrors ReliabilityChartDisplay export)', () => {
   it('exports reliability data with correct headers and values', () => {
     const data = [
       { model: 'h100-sxm', modelLabel: 'H100 SXM', successRate: 99.5, n_success: 199, total: 200 },
@@ -179,9 +232,22 @@ describe('reliabilityChartToCsv', () => {
     expect(headers).toHaveLength(5);
     expect(rows).toHaveLength(0);
   });
+
+  it('exports all visible GPUs from chartData (no extra filtering needed)', () => {
+    // ReliabilityChartDisplay passes chartData directly — no precision/GPU filter
+    const chartData = [
+      { model: 'h100-sxm', modelLabel: 'H100 SXM', successRate: 99.5, n_success: 199, total: 200 },
+      { model: 'a100-sxm', modelLabel: 'A100 SXM', successRate: 95.0, n_success: 95, total: 100 },
+    ];
+
+    const { rows } = reliabilityChartToCsv(chartData);
+    expect(rows).toHaveLength(2);
+    expect(rows[0][0]).toBe('H100 SXM');
+    expect(rows[1][0]).toBe('A100 SXM');
+  });
 });
 
-describe('evaluationChartToCsv', () => {
+describe('evaluationChartToCsv (mirrors EvaluationChartDisplay export)', () => {
   const makeEvalPoint = (
     overrides: Partial<{
       configLabel: string;
@@ -245,9 +311,49 @@ describe('evaluationChartToCsv', () => {
     expect(rows[0][headers.indexOf('Min Score')]).toBe('');
     expect(rows[0][headers.indexOf('Max Score')]).toBe('');
   });
+
+  it('exports all chartData entries directly (no extra filtering needed)', () => {
+    // EvaluationChartDisplay passes chartData directly from context
+    const data = [
+      makeEvalPoint({ hwKey: 'h100-sxm-vllm', precision: 'fp8' }),
+      makeEvalPoint({ hwKey: 'b200-sxm-sglang', precision: 'fp4' }),
+    ];
+
+    const { headers, rows } = evaluationChartToCsv(data);
+    expect(rows).toHaveLength(2);
+    expect(rows[0][headers.indexOf('Hardware Key')]).toBe('h100-sxm-vllm');
+    expect(rows[1][headers.indexOf('Hardware Key')]).toBe('b200-sxm-sglang');
+    expect(rows[0][headers.indexOf('Precision')]).toBe('fp8');
+    expect(rows[1][headers.indexOf('Precision')]).toBe('fp4');
+  });
+
+  it('includes Benchmark column reflecting the selected eval type (pre-filtered by context)', () => {
+    // EvaluationContext filters rawData by selectedBenchmark before building chartData,
+    // so all rows in the export share the same benchmark value
+    const data = [
+      makeEvalPoint({ benchmark: 'mmlu', hwKey: 'h100-sxm-vllm' }),
+      makeEvalPoint({ benchmark: 'mmlu', hwKey: 'b200-sxm-sglang' }),
+    ];
+
+    const { headers, rows } = evaluationChartToCsv(data);
+    expect(headers).toContain('Benchmark');
+    expect(rows[0][headers.indexOf('Benchmark')]).toBe('mmlu');
+    expect(rows[1][headers.indexOf('Benchmark')]).toBe('mmlu');
+  });
+
+  it('only contains data for one benchmark at a time (context filters by selectedBenchmark)', () => {
+    // Simulates that context already filtered to only 'humaneval' — no 'mmlu' rows leak through
+    const data = [
+      makeEvalPoint({ benchmark: 'humaneval', hwKey: 'h100-sxm-vllm' }),
+      makeEvalPoint({ benchmark: 'humaneval', hwKey: 'b200-sxm-sglang' }),
+    ];
+
+    const { headers, rows } = evaluationChartToCsv(data);
+    expect(rows.every((r) => r[headers.indexOf('Benchmark')] === 'humaneval')).toBe(true);
+  });
 });
 
-describe('calculatorChartToCsv', () => {
+describe('calculatorChartToCsv (mirrors ThroughputCalculatorDisplay export)', () => {
   it('exports calculator results with target interactivity', () => {
     const results = [
       {
@@ -318,9 +424,48 @@ describe('calculatorChartToCsv', () => {
     expect(headers).toHaveLength(14);
     expect(rows).toHaveLength(0);
   });
+
+  it('exports results with label resolver (mirrors ThroughputCalculatorDisplay export)', () => {
+    // ThroughputCalculatorDisplay uses a getLabel that resolves hwKey → display name
+    const results = [
+      {
+        resultKey: 'h100-sxm-sglang',
+        hwKey: 'h100-sxm-sglang',
+        precision: 'FP8',
+        value: 1200,
+        cost: 0.52,
+        concurrency: 16,
+      },
+      {
+        resultKey: 'b200-sxm-sglang',
+        hwKey: 'b200-sxm-sglang',
+        precision: 'FP4',
+        value: 2000,
+        cost: 0.35,
+        concurrency: 32,
+      },
+    ];
+
+    const labelMap: Record<string, string> = {
+      'h100-sxm-sglang': 'H100 SXM (SGLang)',
+      'b200-sxm-sglang': 'B200 SXM (SGLang)',
+    };
+
+    const { headers, rows } = calculatorChartToCsv(
+      results,
+      125,
+      (hwKey) => labelMap[hwKey] ?? hwKey,
+    );
+    expect(rows).toHaveLength(2);
+    expect(rows[0][headers.indexOf('GPU')]).toBe('H100 SXM (SGLang)');
+    expect(rows[0][headers.indexOf('Precision')]).toBe('FP8');
+    expect(rows[0][headers.indexOf('Cost per Million Total Tokens ($)')]).toBe(0.52);
+    expect(rows[1][headers.indexOf('GPU')]).toBe('B200 SXM (SGLang)');
+    expect(rows[1][headers.indexOf('Precision')]).toBe('FP4');
+  });
 });
 
-describe('historicalTrendToCsv', () => {
+describe('historicalTrendToCsv (mirrors HistoricalTrendsDisplay export)', () => {
   it('flattens trend lines into rows with GPU labels', () => {
     const trendLines = new Map([
       [
@@ -378,4 +523,33 @@ describe('historicalTrendToCsv', () => {
     expect(headers).toHaveLength(8);
     expect(rows).toHaveLength(0);
   });
+
+  it('exports with dynamic metric label and target interactivity (mirrors HistoricalTrendsDisplay export)', () => {
+    // HistoricalTrendsDisplay passes currentYLabel and targetInteractivity
+    const trendLines = new Map([
+      [
+        'h100-sxm-sglang',
+        [
+          { date: '2025-01-10', value: 800, x: 50 },
+          { date: '2025-01-15', value: 950, x: 50 },
+        ],
+      ],
+    ]);
+
+    const lineConfigs = [{ id: 'h100-sxm-sglang', label: 'H100 SXM (SGLang)', precision: 'fp8' }];
+
+    const { headers, rows } = historicalTrendToCsv(
+      trendLines,
+      lineConfigs,
+      'Cost per Million Tokens ($)',
+      50,
+    );
+
+    expect(headers).toContain('Cost per Million Tokens ($)');
+    expect(headers).toContain('Target Interactivity (tok/s/user)');
+    expect(rows).toHaveLength(2);
+    expect(rows[0][headers.indexOf('Cost per Million Tokens ($)')]).toBe(800);
+    expect(rows[0][headers.indexOf('Target Interactivity (tok/s/user)')]).toBe(50);
+    expect(rows[1][headers.indexOf('Date')]).toBe('2025-01-15');
+  });
 });