Make flame graphs in benchmark comparison viewer only show measured samples of that subtest

mstange · mstange · commit b345b45d2778 · 2026-05-13T18:49:43.000-04:00
diff --git a/src/components/app/BenchmarkCompareViewer.tsx b/src/components/app/BenchmarkCompareViewer.tsx
@@ -27,13 +27,11 @@ import type {
 } from 'firefox-profiler/profile-logic/benchmark/perf-compare-stats';
 import type { Profile } from 'firefox-profiler/types';
 import { BucketFlameGraphPair } from './BucketFlameGraphPair';
-import type { BucketProfileBundle } from './BucketFlameGraphPair';
 import {
-  buildDerivedThread,
-  getCategoriesForProfile,
-  getDefaultCategoryIndex,
+  makeBucketProfileBundle,
+  makeSuiteFilteredThread,
 } from 'firefox-profiler/profile-logic/benchmark/bucket-flame-graph-data';
-import { getBenchmarkInfo } from 'firefox-profiler/profile-logic/benchmark/benchmark-stuff';
+import type { BucketProfileBundle } from 'firefox-profiler/profile-logic/benchmark/bucket-flame-graph-data';
 import './BenchmarkCompareViewer.css';
 
 type ComparisonData = {
@@ -286,17 +284,10 @@ function ScoreTable({
       </thead>
       <tbody>
         <tr className="benchmarkRow--overall">
-          <td
-            className="benchmarkCell--scoreLabel"
-            title={overallScore.label}
-          >
+          <td className="benchmarkCell--scoreLabel" title={overallScore.label}>
             {overallScore.label}
           </td>
-          <ScoreRow
-            row={overallScore}
-            isOverall={true}
-            numSuites={numSuites}
-          />
+          <ScoreRow row={overallScore} isOverall={true} numSuites={numSuites} />
         </tr>
         {suiteScores.map((row) => {
           const isExpanded = expanded.has(row.label);
@@ -315,10 +306,7 @@ function ScoreTable({
                   title={row.label}
                 >
                   {expandable && (
-                    <span
-                      className="benchmarkDisclosure"
-                      aria-hidden="true"
-                    >
+                    <span className="benchmarkDisclosure" aria-hidden="true">
                       {isExpanded ? '▼' : '▶'}
                     </span>
                   )}
@@ -369,6 +357,18 @@ function BucketTable({
     baseSubtestMean !== undefined && numSuites !== undefined;
   const columnCount = showSubtestColumns ? 6 : 5;
 
+  // Build per-suite bundles whose `thread.samples.weight` is zeroed outside
+  // this suite's iteration markers, so flame graphs reflect only the samples
+  // that contribute to this suite's score.
+  const baseSuiteBundle = useMemo(
+    () => withSuiteFilteredThread(baseBundle, label),
+    [baseBundle, label]
+  );
+  const newSuiteBundle = useMemo(
+    () => withSuiteFilteredThread(newBundle, label),
+    [newBundle, label]
+  );
+
   const [expanded, setExpanded] = useState<Set<string>>(new Set());
   const toggle = (bucketName: string) => {
     setExpanded((prev) => {
@@ -380,7 +380,7 @@ function BucketTable({
   };
 
   const significant = comparisons
-    .filter((c) => c.confidence !== 'LOW' && c.effectSize !== 'Negligible')
+    // .filter((c) => c.confidence !== 'LOW' && c.effectSize !== 'Negligible')
     .sort(
       (a, b) =>
         Math.abs(b.newMean - b.baseMean) - Math.abs(a.newMean - a.baseMean)
@@ -465,16 +465,18 @@ function BucketTable({
                 <td className="benchmarkCell--number">
                   {c.baseMean.toFixed(2)}
                 </td>
-                <td className="benchmarkCell--number">{c.newMean.toFixed(2)}</td>
+                <td className="benchmarkCell--number">
+                  {c.newMean.toFixed(2)}
+                </td>
                 <td className="benchmarkCell--number">{absDiffStr}</td>
                 {pctCells}
               </tr>
               {expandable && isExpanded && (
                 <tr className="benchmarkRow--bucket-expansion">
                   <td colSpan={columnCount}>
                     <BucketFlameGraphPair
-                      baseBundle={baseBundle}
-                      newBundle={newBundle}
+                      baseBundle={baseSuiteBundle}
+                      newBundle={newSuiteBundle}
                       baseFunc={c.baseFunc}
                       newFunc={c.newFunc}
                     />
@@ -489,20 +491,14 @@ function BucketTable({
   );
 }
 
-/** Build the (profile, derivedThread, categories) bundle once per profile.
- * Computing the derived thread is expensive, so we memoize on profile identity
- * and reuse the same bundle across every bucket the user expands. */
-function makeBucketProfileBundle(profile: Profile): BucketProfileBundle {
-  const categories = getCategoriesForProfile(profile);
-  const defaultCategory = getDefaultCategoryIndex(categories);
-  const benchmarkInfo = getBenchmarkInfo(profile, 'speedometer');
-  const thread = buildDerivedThread(
-    profile,
-    benchmarkInfo.threadIndex,
-    categories,
-    defaultCategory
-  );
-  return { profile, thread, categories, defaultCategory };
+/** Return a copy of `bundle` whose `thread` has sample weights zeroed outside
+ * this suite's iteration markers (matching the filtering applied to the suite
+ * count). All other bundle fields are shared with the input. */
+function withSuiteFilteredThread(
+  bundle: BucketProfileBundle,
+  suiteName: string
+): BucketProfileBundle {
+  return { ...bundle, thread: makeSuiteFilteredThread(bundle, suiteName) };
 }
 
 function ComparisonResults({ data }: { data: ComparisonData }) {
@@ -514,11 +510,11 @@ function ComparisonResults({ data }: { data: ComparisonData }) {
   );
 
   const baseBundle = useMemo(
-    () => makeBucketProfileBundle(data.baseProfile),
+    () => makeBucketProfileBundle(data.baseProfile, 'speedometer'),
     [data.baseProfile]
   );
   const newBundle = useMemo(
-    () => makeBucketProfileBundle(data.newProfile),
+    () => makeBucketProfileBundle(data.newProfile, 'speedometer'),
     [data.newProfile]
   );
 
diff --git a/src/components/app/BucketFlameGraphPair.tsx b/src/components/app/BucketFlameGraphPair.tsx
@@ -7,25 +7,16 @@ import { useMemo, useState } from 'react';
 import { FlameGraph } from 'firefox-profiler/components/flame-graph/FlameGraph';
 import { computeBucketFlameGraphData } from 'firefox-profiler/profile-logic/benchmark/bucket-flame-graph-data';
 
-import type { BucketFlameGraphData } from 'firefox-profiler/profile-logic/benchmark/bucket-flame-graph-data';
 import type {
-  Profile,
-  Thread,
-  CategoryList,
-  IndexIntoCategoryList,
+  BucketFlameGraphData,
+  BucketProfileBundle,
+} from 'firefox-profiler/profile-logic/benchmark/bucket-flame-graph-data';
+import type {
   IndexIntoFuncTable,
   IndexIntoCallNodeTable,
 } from 'firefox-profiler/types';
 
-/** Per-profile prep data passed in from the viewer. The derived `thread` is
- * expensive to build, so it's computed once at the viewer level and reused
- * across every bucket the user expands. */
-export type BucketProfileBundle = {
-  profile: Profile;
-  thread: Thread;
-  categories: CategoryList;
-  defaultCategory: IndexIntoCategoryList;
-};
+export type { BucketProfileBundle };
 
 type SideProps = {
   label: string;
diff --git a/src/profile-logic/benchmark/benchmark-stuff.ts b/src/profile-logic/benchmark/benchmark-stuff.ts
@@ -15,7 +15,7 @@ import { ensureExists } from 'firefox-profiler/utils/types';
 
 export type BenchmarkHarness = 'speedometer' | 'jetstream';
 
-type BenchmarkInfo = {
+export type BenchmarkInfo = {
   suiteNameIfSingleSuite: string | null;
   threadIndex: number;
   getMeasuredTimeRanges: (
@@ -307,6 +307,33 @@ export type IterationMarkersAndMeasuredSamples = {
   measuredSamples: SamplesTableForThisStuff;
 };
 
+/**
+ * Compute per-suite sample weights, filtered to (already-applied measured time
+ * ranges) ∩ (this suite's iteration marker ranges). The input weights are
+ * `measuredSamples.weight` (i.e. weights with -async/-sync filtering and
+ * ignored-bucket zeroing already applied). The output zeroes out any weight
+ * outside this suite's iteration markers, so the flame graph for this suite
+ * reflects exactly the same samples that the suite's score counts.
+ *
+ * Iteration markers are assumed to be sorted by start time and non-overlapping
+ * (matching the assumption in `computeSuiteScores`).
+ */
+export function computeSuiteFilteredSampleWeights(
+  measuredSampleWeights: Float64Array,
+  sampleTimes: Float64Array,
+  iterationMarkers: Marker[]
+): Float64Array {
+  const filtered = measuredSampleWeights.slice();
+  const ranges: StartEndRange[] = [];
+  for (const m of iterationMarkers) {
+    if (m.end !== null) {
+      ranges.push({ start: m.start, end: m.end });
+    }
+  }
+  zeroWeightsOutsideRanges(filtered, sampleTimes, ranges);
+  return filtered;
+}
+
 export function computeIterationMarkersAndMeasuredSamples(
   benchmarkInfo: BenchmarkInfo,
   filteredMarkers: Marker[],
@@ -356,7 +383,7 @@ function computeGeomean(values: number[]): number {
   return Math.pow(product, 1 / values.length);
 }
 
-function zeroWeightsOutsideRanges(
+export function zeroWeightsOutsideRanges(
   sampleWeights: Float64Array,
   sampleTimes: Float64Array,
   nonZeroRanges: StartEndRange[]
diff --git a/src/profile-logic/benchmark/bucket-flame-graph-data.ts b/src/profile-logic/benchmark/bucket-flame-graph-data.ts
@@ -21,6 +21,7 @@ import {
   createThreadFromDerivedTables,
   getCallNodeInfo,
   getSampleIndexToCallNodeIndex,
+  getTimeRangeForThread,
 } from '../profile-data';
 import * as Transforms from '../transforms';
 import * as CallTree from '../call-tree';
@@ -29,8 +30,19 @@ import { computeReferenceCPUDeltaPerMs } from '../cpu';
 import { getDefaultCategories } from '../data-structures';
 import { StringTable } from '../../utils/string-table';
 import { base64StringToBytes } from '../../utils/base64';
+import {
+  correlateIPCMarkers,
+  deriveMarkersFromRawMarkerTable,
+} from '../marker-data';
+import {
+  computeSuiteFilteredSampleWeights,
+  getBenchmarkInfo,
+  zeroWeightsOutsideRanges,
+} from './benchmark-stuff';
+import type { BenchmarkHarness, BenchmarkInfo } from './benchmark-stuff';
 
 import type {
+  Marker,
   Thread,
   Profile,
   IndexIntoFuncTable,
@@ -70,7 +82,9 @@ export function getCategoriesForProfile(profile: Profile): CategoryList {
 }
 
 /** Default category index — the "Other" / grey category. */
-export function getDefaultCategoryIndex(categories: CategoryList): IndexIntoCategoryList {
+export function getDefaultCategoryIndex(
+  categories: CategoryList
+): IndexIntoCategoryList {
   return categories.findIndex((c) => c.color === 'grey');
 }
 
@@ -151,7 +165,10 @@ export function computeBucketFlameGraphData(
   );
 
   // 3. CTSS samples (timing strategy → just thread.samples).
-  const ctssSamples = CallTree.extractSamplesLikeTable(selfWingThread, 'timing');
+  const ctssSamples = CallTree.extractSamplesLikeTable(
+    selfWingThread,
+    'timing'
+  );
 
   // 4. Map samples → call nodes.
   const sampleIndexToCallNodeIndex = getSampleIndexToCallNodeIndex(
@@ -233,3 +250,109 @@ export function computeBucketFlameGraphData(
     rootTotalSummary: callNodeSelfAndSummary.rootTotalSummary,
   };
 }
+
+/** Per-profile prep data passed in from the viewer. The derived `thread` is
+ * expensive to build, so it's computed once at the viewer level and reused
+ * across every bucket the user expands. Also carries the benchmark marker
+ * info needed to lazily build per-suite filtered threads. */
+export type BucketProfileBundle = {
+  profile: Profile;
+  thread: Thread;
+  categories: CategoryList;
+  defaultCategory: IndexIntoCategoryList;
+  benchmarkInfo: BenchmarkInfo;
+  /** `thread.samples.time` as a Float64Array, for fast range filtering. */
+  sampleTimes: Float64Array;
+  /** Sample weights with the global -async/-sync measured-time filter applied,
+   * matching the `measuredSamples.weight` used by score computation. */
+  measuredSampleWeights: Float64Array;
+  /** Iteration markers per suite name. Sorted by start time, non-overlapping. */
+  markersPerSuite: Map<string, Marker[]>;
+};
+
+export function makeBucketProfileBundle(
+  profile: Profile,
+  benchmarkHarness: BenchmarkHarness
+): BucketProfileBundle {
+  const categories = getCategoriesForProfile(profile);
+  const defaultCategory = getDefaultCategoryIndex(categories);
+  const benchmarkInfo = getBenchmarkInfo(profile, benchmarkHarness);
+  const thread = buildDerivedThread(
+    profile,
+    benchmarkInfo.threadIndex,
+    categories,
+    defaultCategory
+  );
+
+  const { shared } = profile;
+  const rawThread = profile.threads[benchmarkInfo.threadIndex];
+  const stringTable = StringTable.withBackingArray(shared.stringArray);
+  const { markers: derivedMarkers } = deriveMarkersFromRawMarkerTable(
+    rawThread.markers,
+    shared.stringArray,
+    rawThread.tid,
+    getTimeRangeForThread(rawThread, profile.meta.interval),
+    correlateIPCMarkers(profile.threads, shared)
+  );
+
+  const sampleCount = thread.samples.length;
+  const sampleTimes = new Float64Array(thread.samples.time);
+  const measuredSampleWeights = thread.samples.weight
+    ? new Float64Array(thread.samples.weight)
+    : new Float64Array(sampleCount).fill(1);
+  const measuredTimeRanges = benchmarkInfo.getMeasuredTimeRanges(
+    derivedMarkers,
+    stringTable
+  );
+  if (measuredTimeRanges !== null) {
+    zeroWeightsOutsideRanges(
+      measuredSampleWeights,
+      sampleTimes,
+      measuredTimeRanges
+    );
+  }
+
+  const markersPerSuite = benchmarkInfo.getMarkersPerSuite(
+    derivedMarkers,
+    stringTable
+  );
+
+  return {
+    profile,
+    thread,
+    categories,
+    defaultCategory,
+    benchmarkInfo,
+    sampleTimes,
+    measuredSampleWeights,
+    markersPerSuite,
+  };
+}
+
+/**
+ * Return a Thread that shares all tables with `bundle.thread` but has sample
+ * weights zeroed outside this suite's iteration marker ranges. The flame graph
+ * built from this thread then reflects only the samples that contribute to
+ * this suite's score (matching `computeSuiteScores`).
+ */
+export function makeSuiteFilteredThread(
+  bundle: BucketProfileBundle,
+  suiteName: string
+): Thread {
+  const { thread, sampleTimes, measuredSampleWeights, markersPerSuite } =
+    bundle;
+  const iterationMarkers = markersPerSuite.get(suiteName) ?? [];
+  const filteredWeights = computeSuiteFilteredSampleWeights(
+    measuredSampleWeights,
+    sampleTimes,
+    iterationMarkers
+  );
+  return {
+    ...thread,
+    samples: {
+      ...thread.samples,
+      weight: Array.from(filteredWeights),
+      weightType: thread.samples.weightType ?? 'samples',
+    },
+  };
+}