Skip to content

Commit 83fd8c5

Browse files
committed
Avoid float-to-integer conversions by using fixed precision computations.
When computing the activity graph fills, store the fractional sample positions as integers after multipyling by (1 << FIXED_PREC_BITS). This lets us use shifts and masking when we accumulate the contributions into the buffers in the hot loops.
1 parent 371f63f commit 83fd8c5

2 files changed

Lines changed: 474 additions & 438 deletions

File tree

src/components/shared/thread/ActivityGraphFills.tsx

Lines changed: 79 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,12 @@ type RenderedComponentSettings = {
5050
export type PrecomputedPositions = {
5151
// The fractional device pixel position per sample in the range-filtered thread.
5252
// Each position is clamped such that 0 <= pos < canvasPixelWidth.
53-
samplePositions: Float32Array; // DevicePixel[]
53+
samplePositions: Int32Array; // (DevicePixel * (1 << FIXED_POINT_BITS))[]
5454
// The fractional device pixel position of the half-way point *before* the sample,
5555
// per sample in the range-filtered thread. Has one extra element at the end for
5656
// the half-way position after the last sample.
5757
// Each position is clamped such that 0 <= pos < canvasPixelWidth.
58-
halfwayPositions: Float32Array; // DevicePixel[]
58+
halfwayPositions: Int32Array; // (DevicePixel * (1 << FIXED_POINT_BITS))[]
5959
};
6060

6161
type SampleContributionToPixel = {
@@ -73,7 +73,7 @@ type CategoryFill = {
7373
readonly fillStyle: string | CanvasPattern;
7474
// Mutated in place during the computation step.
7575
// Contains values between 0 and 100.
76-
readonly perPixelContribution: Float32Array<ArrayBuffer>;
76+
readonly perPixelContribution: Int32Array<ArrayBuffer>;
7777
// Mutated in place during the computation step.
7878
// Contains values between 0 and 1.
7979
readonly accumulatedUpperEdge: Float32Array<ArrayBuffer>;
@@ -93,7 +93,7 @@ export type CategoryDrawStyles = ReadonlyArray<{
9393

9494
// These Float32Arrays are mutated in place during the computation step.
9595
// buffers[selectedState] is the buffer for the given SelectedState enum value.
96-
type SelectedPercentageAtPixelBuffers = Float32Array<ArrayBuffer>[];
96+
type SelectedPercentageAtPixelBuffers = Int32Array<ArrayBuffer>[];
9797

9898
export type CpuRatioInTimeRange = {
9999
readonly cpuRatio: number;
@@ -109,6 +109,9 @@ const SMOOTHING_KERNEL: Float32Array<ArrayBuffer> = _getSmoothingKernel(
109109
BOX_BLUR_RADII
110110
);
111111

112+
const FIXED_POINT_BITS = 16;
113+
const FIXED_POINT_MASK = (1 << FIXED_POINT_BITS) - 1;
114+
112115
export function precomputePositions(
113116
fullThreadSampleTimes: Milliseconds[],
114117
sampleIndexOffset: number,
@@ -118,24 +121,29 @@ export function precomputePositions(
118121
interval: Milliseconds,
119122
canvasPixelWidth: DevicePixels
120123
): PrecomputedPositions {
121-
function convertTimeToClampedPosition(time: Milliseconds): DevicePixels {
122-
const pos = (time - rangeStart) * xPixelsPerMs;
123-
if (pos < 0) {
124+
const canvasPixelsFp = canvasPixelWidth << FIXED_POINT_BITS;
125+
const xPixelsFpPerMs = xPixelsPerMs * (1 << FIXED_POINT_BITS);
126+
127+
function convertTimeToClampedPositionFixedPointPrecision(
128+
time: Milliseconds
129+
): DevicePixels {
130+
const posFp = ((time - rangeStart) * xPixelsFpPerMs) | 0;
131+
if (posFp < 0) {
124132
return 0;
125133
}
126-
if (pos > canvasPixelWidth - 0.1) {
127-
return canvasPixelWidth - 0.1;
134+
if (posFp >= canvasPixelsFp) {
135+
return canvasPixelsFp - 1;
128136
}
129-
return pos;
137+
return posFp;
130138
}
131139

132140
// The fractional device pixel position per sample in the range-filtered thread.
133-
const samplePositions = new Float32Array(sampleCount); // DevicePixel[]
141+
const samplePositions = new Int32Array(sampleCount); // DevicePixel[]
134142

135143
// The fractional device pixel position of the half-way point *before* the sample,
136144
// per sample in the range-filtered thread. Has one extra element at the end for
137145
// the half-way position after the last sample.
138-
const halfwayPositions = new Float32Array(sampleCount + 1); // DevicePixel[]
146+
const halfwayPositions = new Int32Array(sampleCount + 1); // DevicePixel[]
139147

140148
let previousSampleTime =
141149
sampleIndexOffset > 0
@@ -144,10 +152,13 @@ export function precomputePositions(
144152
// Go through the samples and accumulate the category into the percentageBuffers.
145153
for (let i = 0; i < sampleCount; i++) {
146154
const sampleTime = fullThreadSampleTimes[sampleIndexOffset + i];
147-
samplePositions[i] = convertTimeToClampedPosition(sampleTime);
155+
samplePositions[i] =
156+
convertTimeToClampedPositionFixedPointPrecision(sampleTime);
148157

149158
const halfwayPointTimeBefore = (previousSampleTime + sampleTime) / 2;
150-
halfwayPositions[i] = convertTimeToClampedPosition(halfwayPointTimeBefore);
159+
halfwayPositions[i] = convertTimeToClampedPositionFixedPointPrecision(
160+
halfwayPointTimeBefore
161+
);
151162

152163
previousSampleTime = sampleTime;
153164
}
@@ -159,7 +170,7 @@ export function precomputePositions(
159170
: previousSampleTime + interval;
160171
const halfwayPointTime = (previousSampleTime + afterLastSampleTime) / 2;
161172
halfwayPositions[sampleCount] =
162-
convertTimeToClampedPosition(halfwayPointTime);
173+
convertTimeToClampedPositionFixedPointPrecision(halfwayPointTime);
163174

164175
return {
165176
samplePositions,
@@ -256,7 +267,8 @@ export class ActivityGraphFillComputer {
256267
// Only copy the first array, as there is no accumulation.
257268
const { accumulatedUpperEdge, perPixelContribution } = mutableFills[0];
258269
for (let i = 0; i < perPixelContribution.length; i++) {
259-
accumulatedUpperEdge[i] = perPixelContribution[i] / 100;
270+
accumulatedUpperEdge[i] =
271+
perPixelContribution[i] / (100 << FIXED_POINT_BITS);
260272
}
261273
}
262274

@@ -268,7 +280,8 @@ export class ActivityGraphFillComputer {
268280
} of mutableFills.slice(1)) {
269281
for (let i = 0; i < perPixelContribution.length; i++) {
270282
accumulatedUpperEdge[i] =
271-
previousUpperEdge[i] + perPixelContribution[i] / 100;
283+
previousUpperEdge[i] +
284+
perPixelContribution[i] / (100 << FIXED_POINT_BITS);
272285
}
273286
previousUpperEdge = accumulatedUpperEdge;
274287
}
@@ -348,19 +361,25 @@ export class ActivityGraphFillComputer {
348361
const endPos = samplePosition;
349362
const cpuPercent = beforeSampleCpuPercent;
350363

351-
const intStartPos = startPos | 0;
352-
const intEndPos = endPos | 0;
364+
const intStartPos = startPos >> FIXED_POINT_BITS;
365+
const intEndPos = endPos >> FIXED_POINT_BITS;
353366

354367
if (intStartPos === intEndPos) {
355368
percentageBuffer[intStartPos] += cpuPercent * (endPos - startPos);
356369
} else {
357370
if (intStartPos + 1 < intEndPos) {
358-
percentageBuffer.fill(cpuPercent, intStartPos + 1, intEndPos);
371+
percentageBuffer.fill(
372+
cpuPercent << FIXED_POINT_BITS,
373+
intStartPos + 1,
374+
intEndPos
375+
);
359376
}
360377

378+
const startPosFrac = startPos & FIXED_POINT_MASK;
361379
percentageBuffer[intStartPos] +=
362-
cpuPercent * (1 - (startPos - intStartPos));
363-
percentageBuffer[intEndPos] += cpuPercent * (endPos - intEndPos);
380+
cpuPercent * ((1 << FIXED_POINT_BITS) - startPosFrac);
381+
const endPosFrac = endPos & FIXED_POINT_MASK;
382+
percentageBuffer[intEndPos] += cpuPercent * endPosFrac;
364383
}
365384
}
366385

@@ -369,19 +388,25 @@ export class ActivityGraphFillComputer {
369388
const endPos = halfwayPositionAfter;
370389
const cpuPercent = afterSampleCpuPercent;
371390

372-
const intStartPos = startPos | 0;
373-
const intEndPos = endPos | 0;
391+
const intStartPos = startPos >> FIXED_POINT_BITS;
392+
const intEndPos = endPos >> FIXED_POINT_BITS;
374393

375394
if (intStartPos === intEndPos) {
376395
percentageBuffer[intStartPos] += cpuPercent * (endPos - startPos);
377396
} else {
378397
if (intStartPos + 1 < intEndPos) {
379-
percentageBuffer.fill(cpuPercent, intStartPos + 1, intEndPos);
398+
percentageBuffer.fill(
399+
cpuPercent << FIXED_POINT_BITS,
400+
intStartPos + 1,
401+
intEndPos
402+
);
380403
}
381404

405+
const startPosFrac = startPos & FIXED_POINT_MASK;
382406
percentageBuffer[intStartPos] +=
383-
cpuPercent * (1 - (startPos - intStartPos));
384-
percentageBuffer[intEndPos] += cpuPercent * (endPos - intEndPos);
407+
cpuPercent * ((1 << FIXED_POINT_BITS) - startPosFrac);
408+
const endPosFrac = endPos & FIXED_POINT_MASK;
409+
percentageBuffer[intEndPos] += cpuPercent * endPosFrac;
385410
}
386411
}
387412

@@ -690,6 +715,7 @@ export class ActivityFillGraphQuerier {
690715
rangeFilteredThread: { samples },
691716
precomputedPositions: { samplePositions, halfwayPositions },
692717
} = this.renderedComponentSettings;
718+
693719
const halfwayPositionBefore = halfwayPositions[sample];
694720
const halfwayPositionAfter = halfwayPositions[sample + 1];
695721
const samplePosition = samplePositions[sample];
@@ -747,8 +773,8 @@ function _createSelectedPercentageAtPixelBuffers({
747773
}): SelectedPercentageAtPixelBuffers[] {
748774
return categoryDrawStyles.map(() => {
749775
const percentageBuffers = [];
750-
for (let i = 0; i < SELECTED_STATE_BUFFER_COUNT; i++) {
751-
percentageBuffers[i] = new Float32Array(canvasPixelWidth);
776+
for (let i = 0; i <= SELECTED_STATE_BUFFER_COUNT; i++) {
777+
percentageBuffers[i] = new Int32Array(canvasPixelWidth);
752778
}
753779
return percentageBuffers;
754780
});
@@ -834,32 +860,44 @@ function _accumulateHalfSampleToKernelSum(
834860
return SMOOTHING_KERNEL[indexInSmoothingKernel];
835861
}
836862

837-
const intStartPos = startPos | 0;
838-
const intEndPos = endPos | 0;
863+
const intStartPos = startPos >> FIXED_POINT_BITS;
864+
const intEndPos = endPos >> FIXED_POINT_BITS;
839865

840866
let sum = 0;
841867

842868
if (intStartPos === intEndPos) {
843-
sum += kernelVal(intStartPos) * cpuPercent * (endPos - startPos);
869+
const frac = (endPos - startPos) / (1 << FIXED_POINT_BITS);
870+
sum += kernelVal(intStartPos) * cpuPercent * frac;
844871
} else {
845872
if (intStartPos + 1 < intEndPos) {
846873
for (let i = intStartPos + 1; i < intEndPos; i++) {
847874
sum += kernelVal(i) * cpuPercent;
848875
}
849876
}
850877

851-
sum += kernelVal(intStartPos) * cpuPercent * (1 - (startPos - intStartPos));
852-
sum += kernelVal(intEndPos) * cpuPercent * (endPos - intEndPos);
878+
const startPosFrac =
879+
(startPos & FIXED_POINT_MASK) / (1 << FIXED_POINT_BITS);
880+
sum += kernelVal(intStartPos) * cpuPercent * (1 - startPosFrac);
881+
const endPosFrac = (endPos & FIXED_POINT_MASK) / (1 << FIXED_POINT_BITS);
882+
sum += kernelVal(intEndPos) * cpuPercent * endPosFrac;
853883
}
854884
return sum;
855885
}
856886

887+
type TypedArray<T> = {
888+
readonly length: number;
889+
[n: number]: number;
890+
BYTES_PER_ELEMENT: number;
891+
set(array: T, offset?: number): void;
892+
slice(start?: number, end?: number): T;
893+
};
894+
857895
/**
858896
* Apply a 1d box blur to a destination array.
859897
*/
860-
function _boxBlur1D(
861-
srcArray: Float32Array,
862-
destArray: Float32Array,
898+
function _boxBlur1D<T extends TypedArray<T>>(
899+
srcArray: T,
900+
destArray: T,
863901
radius: number
864902
): void {
865903
if (srcArray.length < radius) {
@@ -892,12 +930,12 @@ function _boxBlur1D(
892930
/**
893931
* Apply a blur with a gaussian distribution to a destination array.
894932
*/
895-
function _applyGaussianBlur1D(
896-
srcArray: Float32Array<ArrayBuffer>,
933+
function _applyGaussianBlur1D<T extends TypedArray<T>>(
934+
srcArray: T,
897935
boxBlurRadii: number[]
898936
): void {
899937
let a = srcArray;
900-
let b = new Float32Array(srcArray.length);
938+
let b = srcArray.slice();
901939
for (const radius of boxBlurRadii) {
902940
_boxBlur1D(a, b, radius);
903941
[b, a] = [a, b];
@@ -907,8 +945,6 @@ function _applyGaussianBlur1D(
907945
// The last blur was applied to the temporary array, blit the final values back
908946
// to the srcArray. This ensures that we are always mutating the values of the
909947
// src array, and not returning the newly created array.
910-
for (let i = 0; i < srcArray.length; i++) {
911-
srcArray[i] = a[i];
912-
}
948+
srcArray.set(a);
913949
}
914950
}

0 commit comments

Comments
 (0)