Skip to content

Commit 77cdc72

Browse files
committed
Working radix-4 inverse
1 parent 1fcbf77 commit 77cdc72

9 files changed

Lines changed: 164 additions & 65 deletions

File tree

apps/typegpu-docs/src/examples/image-processing/camera-fft/index.ts

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,20 @@
1-
import { createFft2d, createStockhamRadix4LineStrategy, type Fft2d } from '@typegpu/fft';
1+
import { oklabGamutClipSlot, oklabToLinearRgb } from '@typegpu/color';
2+
import {
3+
createFft2d,
4+
createStockhamRadix2LineStrategy,
5+
createStockhamRadix4LineStrategy,
6+
type Fft2d,
7+
} from '@typegpu/fft';
28
import tgpu, { common, d, std } from 'typegpu';
39
import { defineControls } from '../../common/defineControls.ts';
410

511
/**
612
* Pipeline: camera → luminance (optional separable Hann window) → `encodeForward` → radial low-pass on the
7-
* spectrum buffer → optional `encodeInverse` (spatial) or log-magnitude spectrum.
8-
* Radix-2 Stockham by default; optional radix-4 line strategy. One compute pass chains fill, FFT, filter,
9-
* inverse FFT, and spatial/mag; then a render pass presents.
13+
* spectrum buffer → optional `encodeInverse` (spatial) or log-magnitude spectrum colored in **Oklab**
14+
* (lightness from magnitude, hue from complex phase via `a,b`).
15+
* Line FFT: **radix-4 (default)** (faster Stockham-style radix-4 + optional radix-2 tail) or **radix-2**
16+
* (pure Stockham radix-2). One compute pass chains fill, FFT, filter, inverse FFT, and spatial/mag; then a
17+
* render pass presents.
1018
*/
1119

1220
const WORKGROUP = 256;
@@ -48,9 +56,10 @@ function decomposeWorkgroups(total: number): [number, number, number] {
4856
/** Max longer side of the camera ROI before downscale; FFT pad is `nextPowerOf2(effW)×nextPowerOf2(effH)`. */
4957
let fftMaxSide = 1024;
5058

51-
type LineFftMode = 'default' | 'radix4';
59+
/** UI select values — must match `lineFft` control `options`. */
60+
type LineFftMode = 'radix-4 (default)' | 'radix-2';
5261

53-
let lineFftMode: LineFftMode = 'default';
62+
let lineFftMode: LineFftMode = 'radix-4 (default)';
5463
/** Tracks which line mode the current `fft` was built with (invalidate on change). */
5564
let fftLineFftMode: LineFftMode | undefined;
5665

@@ -213,6 +222,7 @@ const magKernel = tgpu.computeFn({
213222
numWorkgroups: d.builtin.numWorkgroups,
214223
},
215224
})((input) => {
225+
'use gpu';
216226
const wg = d.u32(WORKGROUP);
217227
const spanX = input.numWorkgroups.x * wg;
218228
const spanY = input.numWorkgroups.y * spanX;
@@ -242,7 +252,14 @@ const magKernel = tgpu.computeFn({
242252
const len = std.sqrt(cShift.x * cShift.x + cShift.y * cShift.y);
243253
const logv = std.log(1.0 + len) * magLayout.$.params.gain;
244254
const cv = std.clamp(logv, 0.0, 1.0);
245-
std.textureStore(magLayout.$.outTex, d.vec2u(xLin, yLin), d.vec4f(cv, cv, cv, 1));
255+
/** Perceptual lightness from log-magnitude; chroma scales with magnitude; phase → hue in the `a,b` plane. */
256+
const eps = 1e-8;
257+
const hue = std.atan2(cShift.y, cShift.x);
258+
const chroma = std.select(cv * 0.16, d.f32(0), len < eps);
259+
const L = 0.04 + cv * 0.88;
260+
const lab = d.vec3f(L, chroma * std.cos(hue), chroma * std.sin(hue));
261+
const rgb = oklabToLinearRgb(oklabGamutClipSlot.$(lab));
262+
std.textureStore(magLayout.$.outTex, d.vec2u(xLin, yLin), d.vec4f(rgb, 1));
246263
});
247264

248265
const spatialParamsType = d.struct({
@@ -368,8 +385,8 @@ if (navigator.mediaDevices.getUserMedia) {
368385
video.srcObject = await navigator.mediaDevices.getUserMedia({
369386
video: {
370387
facingMode: 'user',
371-
width: { ideal: 1280 },
372-
height: { ideal: 720 },
388+
width: { ideal: 1920 },
389+
height: { ideal: 1080 },
373390
frameRate: { ideal: 60 },
374391
},
375392
});
@@ -458,8 +475,8 @@ const renderPipeline = root.createRenderPipeline({
458475
});
459476

460477
/** When true: after forward FFT, run inverse and show grayscale spatial reconstruction. When false: show log-magnitude spectrum (forward only). */
461-
let applyInverseFft = true;
462-
let gainValue = 0.12;
478+
let applyInverseFft = false;
479+
let gainValue = 0.2;
463480
/** Normalized low-pass cutoff vs max toroidal radius (1 = no filtering). */
464481
let cutoffRadiusNorm = 1;
465482
/** Separable Hann window on camera ROI before FFT (reduces periodic-boundary cross in spectrum). */
@@ -535,12 +552,13 @@ function ensureResources(frameW: number, frameH: number) {
535552
destroyFftBlock();
536553
padW = nextPadW;
537554
padH = nextPadH;
538-
const lineFactory = lineFftMode === 'radix4' ? createStockhamRadix4LineStrategy : undefined;
555+
const lineFftStrategyFactory =
556+
lineFftMode === 'radix-2' ? createStockhamRadix2LineStrategy : createStockhamRadix4LineStrategy;
539557
fft = createFft2d(root, {
540558
width: padW,
541559
height: padH,
542560
skipFinalTranspose: SKIP_FINAL_FFT_TRANSPOSE,
543-
...(lineFactory !== undefined ? { lineFftStrategyFactory: lineFactory } : {}),
561+
lineFftStrategyFactory,
544562
});
545563
fftLineFftMode = lineFftMode;
546564
lastMagUniformKey = '';
@@ -785,7 +803,7 @@ videoFrameCallbackId = video.requestVideoFrameCallback(processVideoFrame);
785803

786804
export const controls = defineControls({
787805
inverseFft: {
788-
initial: true,
806+
initial: false,
789807
onToggleChange: (value) => {
790808
applyInverseFft = value;
791809
},
@@ -805,7 +823,7 @@ export const controls = defineControls({
805823
},
806824
lineFft: {
807825
initial: lineFftMode,
808-
options: ['radix4', 'default'],
826+
options: ['radix-4 (default)', 'radix-2'],
809827
onSelectChange: (value) => {
810828
lineFftMode = value as LineFftMode;
811829
},

apps/typegpu-docs/src/examples/tests/fft-line-strategy-check/index.ts

Lines changed: 29 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
/**
2-
* Validates @typegpu/fft line strategies: GPU↔CPU (fft.js separable 2D), radix-4 vs default Stockham,
2+
* Validates @typegpu/fft line strategies: GPU↔CPU (fft.js separable 2D), radix-4 vs radix-2 Stockham,
33
* and forward→inverse round-trip (WebGPU).
44
*
55
* Run in the docs dev server: `/TypeGPU/examples#example=tests--fft-line-strategy-check` — needs WebGPU.
66
*/
7-
import { createFft2d, createStockhamRadix4LineStrategy, type Fft2d } from '@typegpu/fft';
7+
import {
8+
createFft2d,
9+
createStockhamRadix2LineStrategy,
10+
createStockhamRadix4LineStrategy,
11+
type Fft2d,
12+
} from '@typegpu/fft';
813
import tgpu, { d } from 'typegpu';
914
import FFT from 'fft.js';
1015

@@ -140,7 +145,11 @@ const host = buildHostComplex();
140145
const cpuRef = hostToFloat64Interleaved(host);
141146
fft2dSeparableForward(cpuRef);
142147

143-
const fftDefault = createFft2d(root, { width: W, height: H });
148+
const fftRadix2 = createFft2d(root, {
149+
width: W,
150+
height: H,
151+
lineFftStrategyFactory: createStockhamRadix2LineStrategy,
152+
});
144153
const fftRadix4 = createFft2d(root, {
145154
width: W,
146155
height: H,
@@ -153,44 +162,44 @@ function loadAndForward(fft: Fft2d) {
153162
}
154163

155164
for (let i = 0; i < WARMUP; i++) {
156-
loadAndForward(fftDefault);
165+
loadAndForward(fftRadix2);
157166
loadAndForward(fftRadix4);
158167
}
159168

160-
loadAndForward(fftDefault);
169+
loadAndForward(fftRadix2);
161170
loadAndForward(fftRadix4);
162171

163-
const outDefault = (await fftDefault.output().read()) as Vec2Like[];
172+
const outRadix2 = (await fftRadix2.output().read()) as Vec2Like[];
164173
const outRadix4 = (await fftRadix4.output().read()) as Vec2Like[];
165174

166175
const parityR4 = diffStats(
167176
outRadix4,
168-
Float64Array.from(outDefault.flatMap((v) => [v.x, v.y])),
177+
Float64Array.from(outRadix2.flatMap((v) => [v.x, v.y])),
169178
1,
170179
);
171180
const parityR4Pass = parityR4.maxAbs < ERR_PASS_MAX && parityR4.rms < ERR_PASS_RMS;
172181
console.info(
173-
`[fft-line-strategy-check] radix-4 vs default Stockham: maxAbs=${parityR4.maxAbs.toExponential(3)} rms=${parityR4.rms.toExponential(3)}${parityR4Pass ? 'PASS' : 'FAIL'}`,
182+
`[fft-line-strategy-check] radix-4 vs radix-2 Stockham: maxAbs=${parityR4.maxAbs.toExponential(3)} rms=${parityR4.rms.toExponential(3)}${parityR4Pass ? 'PASS' : 'FAIL'}`,
174183
);
175184

176-
fftDefault.input.write(host);
177-
submitEncodeForward(device, fftDefault);
178-
submitEncodeInverse(device, fftDefault);
179-
const outDefaultRt = (await fftDefault.output().read()) as Vec2Like[];
185+
fftRadix2.input.write(host);
186+
submitEncodeForward(device, fftRadix2);
187+
submitEncodeInverse(device, fftRadix2);
188+
const outRadix2Rt = (await fftRadix2.output().read()) as Vec2Like[];
180189

181190
fftRadix4.input.write(host);
182191
submitEncodeForward(device, fftRadix4);
183192
submitEncodeInverse(device, fftRadix4);
184193
const outRadix4Rt = (await fftRadix4.output().read()) as Vec2Like[];
185194

186-
const rtParity = gpuGpuDiff(outRadix4Rt, outDefaultRt);
195+
const rtParity = gpuGpuDiff(outRadix4Rt, outRadix2Rt);
187196
const rtPass = rtParity.maxAbs < ERR_PASS_MAX && rtParity.rms < ERR_PASS_RMS;
188197
console.info(
189-
`[fft-line-strategy-check] forward→inverse round-trip radix-4 vs default: maxAbs=${rtParity.maxAbs.toExponential(3)} rms=${rtParity.rms.toExponential(3)}${rtPass ? 'PASS' : 'FAIL'}`,
198+
`[fft-line-strategy-check] forward→inverse round-trip radix-4 vs radix-2: maxAbs=${rtParity.maxAbs.toExponential(3)} rms=${rtParity.rms.toExponential(3)}${rtPass ? 'PASS' : 'FAIL'}`,
190199
);
191200

192201
let cpuScale = 1;
193-
let gpuCpu = diffStats(outDefault, cpuRef, cpuScale);
202+
let gpuCpu = diffStats(outRadix2, cpuRef, cpuScale);
194203
if (gpuCpu.maxAbs > ERR_PASS_MAX * 50) {
195204
const s =
196205
(() => {
@@ -199,16 +208,16 @@ if (gpuCpu.maxAbs > ERR_PASS_MAX * 50) {
199208
for (let i = 0; i < W * H; i++) {
200209
const cr = cpuRef[i * 2]!;
201210
const ci = cpuRef[i * 2 + 1]!;
202-
const gr = outDefault[i]!.x;
203-
const gi = outDefault[i]!.y;
211+
const gr = outRadix2[i]!.x;
212+
const gi = outRadix2[i]!.y;
204213
num += gr * cr + gi * ci;
205214
den += cr * cr + ci * ci;
206215
}
207216
return den > 1e-20 ? num / den : 1;
208217
})();
209218
if (Number.isFinite(s) && Math.abs(s - 1) > 0.01) {
210219
cpuScale = s;
211-
gpuCpu = diffStats(outDefault, cpuRef, cpuScale);
220+
gpuCpu = diffStats(outRadix2, cpuRef, cpuScale);
212221
console.info(
213222
`[fft-line-strategy-check] applied CPU ref scale=${cpuScale.toExponential(6)} (fft.js vs unnormalized Stockham)`,
214223
);
@@ -217,9 +226,9 @@ if (gpuCpu.maxAbs > ERR_PASS_MAX * 50) {
217226

218227
const refPass = gpuCpu.maxAbs < ERR_PASS_MAX && gpuCpu.rms < ERR_PASS_RMS;
219228
console.info(
220-
`[fft-line-strategy-check] GPU default vs CPU fft.js (2D separable): maxAbs=${gpuCpu.maxAbs.toExponential(3)} rms=${gpuCpu.rms.toExponential(3)}${refPass ? 'PASS' : 'FAIL'}`,
229+
`[fft-line-strategy-check] GPU radix-2 vs CPU fft.js (2D separable): maxAbs=${gpuCpu.maxAbs.toExponential(3)} rms=${gpuCpu.rms.toExponential(3)}${refPass ? 'PASS' : 'FAIL'}`,
221230
);
222231

223-
fftDefault.destroy();
232+
fftRadix2.destroy();
224233
fftRadix4.destroy();
225234
root.destroy();

packages/typegpu-fft/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "@typegpu/fft",
33
"version": "0.10.0",
4-
"description": "2D GPU FFT for TypeGPU: Stockham radix-2 (default) and optional radix-4 line strategy.",
4+
"description": "2D GPU FFT for TypeGPU: Stockham-style radix-4 line FFT (default) and optional radix-2 reference path.",
55
"keywords": [
66
"webgpu",
77
"fft",

packages/typegpu-fft/src/fft2d.ts

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
import type { StorageFlag, TgpuBindGroup, TgpuBuffer, TgpuRoot, UniformFlag } from 'typegpu';
22
import { d } from 'typegpu';
3-
import {
4-
createStockhamRadix2LineStrategy,
5-
type LineFftEncodeOptions,
6-
type LineFftStrategyFactory,
7-
} from './lineFftStrategy.ts';
3+
import type { LineFftEncodeOptions, LineFftStrategyFactory } from './lineFftStrategy.ts';
4+
import { createStockhamRadix4LineStrategy } from './lineFftRadix4Strategy.ts';
85
import {
96
createTransposePipeline,
107
dispatchTranspose,
@@ -35,8 +32,9 @@ export type Fft2dOptions = {
3532
*/
3633
skipFinalTranspose?: boolean;
3734
/**
38-
* Pluggable 1D line FFT (Stockham along contiguous lines). Default: radix-2 Stockham via
39-
* {@link createStockhamRadix2LineStrategy}.
35+
* Pluggable 1D line FFT (Stockham-style along contiguous lines). Default: faster radix-4 + optional
36+
* radix-2 tail via {@link createStockhamRadix4LineStrategy}. Pass {@link createStockhamRadix2LineStrategy}
37+
* for pure radix-2 Stockham.
4038
*/
4139
lineFftStrategyFactory?: LineFftStrategyFactory;
4240
};
@@ -70,8 +68,9 @@ export type Fft2d = {
7068
};
7169

7270
/**
73-
* Radix-2 (default) or pluggable line-strategy 2D FFT on row-major `width×height` complex buffers
74-
* (`input` = `buffers[0]`). All GPU work is recorded on a caller-owned {@link GPUComputePassEncoder}.
71+
* 2D FFT on row-major `width×height` complex buffers (`input` = `buffers[0]`). Default line FFT is
72+
* radix-4 Stockham (Bainville) with an optional radix-2 tail; override with {@link Fft2dOptions.lineFftStrategyFactory}.
73+
* All GPU work is recorded on a caller-owned {@link GPUComputePassEncoder}.
7574
*
7675
* Per-stage uniform buffers are distinct so many dispatches can be recorded in one pass before `submit`
7776
* without last-write-wins. Line strategies use four duplicate uniform pools (slots `0`–`3`) for row/column
@@ -80,7 +79,7 @@ export type Fft2d = {
8079
export function createFft2d(root: TgpuRoot, options: Fft2dOptions): Fft2d {
8180
const { width: W, height: H } = options;
8281
const skipFinalTranspose = options.skipFinalTranspose === true;
83-
const lineFftFactory = options.lineFftStrategyFactory ?? createStockhamRadix2LineStrategy;
82+
const lineFftFactory = options.lineFftStrategyFactory ?? createStockhamRadix4LineStrategy;
8483

8584
if (W <= 0 || H <= 0) {
8685
throw new Error('FFT dimensions must be positive');

packages/typegpu-fft/src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
export { complexMul } from './complex.ts';
22
export { createFft2d, type Fft2d, type Fft2dOptions } from './fft2d.ts';
33
export { createStockhamRadix4LineStrategy } from './lineFftRadix4Strategy.ts';
4+
export { createStockhamRadix2LineStrategy } from './lineFftStrategy.ts';
45
export type { LineFftStrategyFactory } from './lineFftStrategy.ts';

packages/typegpu-fft/src/lineFftRadix4Strategy.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { d } from 'typegpu';
22
import {
33
buildStockhamTwiddleLut,
4+
createStockhamDifStagePipeline,
45
createStockhamStagePipeline,
56
stockhamLayout,
67
stockhamUniformType,
@@ -29,6 +30,7 @@ export function createStockhamRadix4LineStrategy(
2930
const radix4Pipeline = createRadix4StagePipeline(root);
3031
const radix4InversePipeline = createRadix4InverseStagePipeline(root);
3132
const stockhamPipeline = createStockhamStagePipeline(root);
33+
const stockhamDifPipeline = createStockhamDifStagePipeline(root);
3234

3335
const twiddleLutLen = nMax - 1;
3436
const twiddleLut = root.createBuffer(d.arrayOf(d.vec2f, twiddleLutLen)).$usage('storage');
@@ -98,6 +100,7 @@ export function createStockhamRadix4LineStrategy(
98100
radix4Pipeline,
99101
radix4InversePipeline,
100102
stockhamPipeline,
103+
stockhamDifPipeline,
101104
radix4Pools,
102105
n,
103106
lineStride,

packages/typegpu-fft/src/lineFftStrategy.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ export type LineFftStrategyFactoryContext = {
3737
* Row and column passes in {@link createFft2d} both use this; transpose stays separate.
3838
*
3939
* **Contract:** `dispatchLineFft(..., { inverse: false })` and `{ inverse: true }` must implement the **same**
40-
* unnormalized complex DFT / IDFT pair as the default Stockham radix-2 path (same convention as {@link createFft2d}).
40+
* unnormalized complex DFT / IDFT pair as the reference Stockham radix-2 path ({@link createStockhamRadix2LineStrategy}).
4141
* A faster **forward** factorization (e.g. radix-4 stages) still defines one linear operator `F`; the inverse pass
4242
* must apply `F⁻¹`, not merely “reverse stages and conjugate twiddles” unless that has been shown equivalent.
4343
* It is valid for `inverse: true` to use a different **sequence** of kernels (e.g. full Stockham inverse) as long
@@ -69,8 +69,8 @@ export type LineFftStrategy = {
6969
export type LineFftStrategyFactory = (ctx: LineFftStrategyFactoryContext) => LineFftStrategy;
7070

7171
/**
72-
* Default: radix-2 Stockham stages (current @typegpu/fft behavior). Use as
73-
* `lineFftStrategyFactory: createStockhamRadix2LineStrategy` or rely on {@link createFft2d} default.
72+
* Pure radix-2 Stockham line FFT (reference implementation). {@link createFft2d} defaults to the faster
73+
* {@link createStockhamRadix4LineStrategy} instead; pass this factory to opt into radix-2 only.
7474
*/
7575
export function createStockhamRadix2LineStrategy(
7676
ctx: LineFftStrategyFactoryContext,

0 commit comments

Comments
 (0)