Working radix-4 inverse

deluksic · deluksic · commit 77cdc7278bc4 · 2026-03-23T01:50:15.000+01:00
diff --git a/apps/typegpu-docs/src/examples/image-processing/camera-fft/index.ts b/apps/typegpu-docs/src/examples/image-processing/camera-fft/index.ts
@@ -1,12 +1,20 @@
-import { createFft2d, createStockhamRadix4LineStrategy, type Fft2d } from '@typegpu/fft';
+import { oklabGamutClipSlot, oklabToLinearRgb } from '@typegpu/color';
+import {
+  createFft2d,
+  createStockhamRadix2LineStrategy,
+  createStockhamRadix4LineStrategy,
+  type Fft2d,
+} from '@typegpu/fft';
 import tgpu, { common, d, std } from 'typegpu';
 import { defineControls } from '../../common/defineControls.ts';
 
 /**
  * Pipeline: camera → luminance (optional separable Hann window) → `encodeForward` → radial low-pass on the
- * spectrum buffer → optional `encodeInverse` (spatial) or log-magnitude spectrum.
- * Radix-2 Stockham by default; optional radix-4 line strategy. One compute pass chains fill, FFT, filter,
- * inverse FFT, and spatial/mag; then a render pass presents.
+ * spectrum buffer → optional `encodeInverse` (spatial) or log-magnitude spectrum colored in **Oklab**
+ * (lightness from magnitude, hue from complex phase via `a,b`).
+ * Line FFT: **radix-4 (default)** (faster Stockham-style radix-4 + optional radix-2 tail) or **radix-2**
+ * (pure Stockham radix-2). One compute pass chains fill, FFT, filter, inverse FFT, and spatial/mag; then a
+ * render pass presents.
  */
 
 const WORKGROUP = 256;
@@ -48,9 +56,10 @@ function decomposeWorkgroups(total: number): [number, number, number] {
 /** Max longer side of the camera ROI before downscale; FFT pad is `nextPowerOf2(effW)×nextPowerOf2(effH)`. */
 let fftMaxSide = 1024;
 
-type LineFftMode = 'default' | 'radix4';
+/** UI select values — must match `lineFft` control `options`. */
+type LineFftMode = 'radix-4 (default)' | 'radix-2';
 
-let lineFftMode: LineFftMode = 'default';
+let lineFftMode: LineFftMode = 'radix-4 (default)';
 /** Tracks which line mode the current `fft` was built with (invalidate on change). */
 let fftLineFftMode: LineFftMode | undefined;
 
@@ -213,6 +222,7 @@ const magKernel = tgpu.computeFn({
     numWorkgroups: d.builtin.numWorkgroups,
   },
 })((input) => {
+  'use gpu';
   const wg = d.u32(WORKGROUP);
   const spanX = input.numWorkgroups.x * wg;
   const spanY = input.numWorkgroups.y * spanX;
@@ -242,7 +252,14 @@ const magKernel = tgpu.computeFn({
   const len = std.sqrt(cShift.x * cShift.x + cShift.y * cShift.y);
   const logv = std.log(1.0 + len) * magLayout.$.params.gain;
   const cv = std.clamp(logv, 0.0, 1.0);
-  std.textureStore(magLayout.$.outTex, d.vec2u(xLin, yLin), d.vec4f(cv, cv, cv, 1));
+  /** Perceptual lightness from log-magnitude; chroma scales with magnitude; phase → hue in the `a,b` plane. */
+  const eps = 1e-8;
+  const hue = std.atan2(cShift.y, cShift.x);
+  const chroma = std.select(cv * 0.16, d.f32(0), len < eps);
+  const L = 0.04 + cv * 0.88;
+  const lab = d.vec3f(L, chroma * std.cos(hue), chroma * std.sin(hue));
+  const rgb = oklabToLinearRgb(oklabGamutClipSlot.$(lab));
+  std.textureStore(magLayout.$.outTex, d.vec2u(xLin, yLin), d.vec4f(rgb, 1));
 });
 
 const spatialParamsType = d.struct({
@@ -368,8 +385,8 @@ if (navigator.mediaDevices.getUserMedia) {
   video.srcObject = await navigator.mediaDevices.getUserMedia({
     video: {
       facingMode: 'user',
-      width: { ideal: 1280 },
-      height: { ideal: 720 },
+      width: { ideal: 1920 },
+      height: { ideal: 1080 },
       frameRate: { ideal: 60 },
     },
   });
@@ -458,8 +475,8 @@ const renderPipeline = root.createRenderPipeline({
 });
 
 /** When true: after forward FFT, run inverse and show grayscale spatial reconstruction. When false: show log-magnitude spectrum (forward only). */
-let applyInverseFft = true;
-let gainValue = 0.12;
+let applyInverseFft = false;
+let gainValue = 0.2;
 /** Normalized low-pass cutoff vs max toroidal radius (1 = no filtering). */
 let cutoffRadiusNorm = 1;
 /** Separable Hann window on camera ROI before FFT (reduces periodic-boundary cross in spectrum). */
@@ -535,12 +552,13 @@ function ensureResources(frameW: number, frameH: number) {
     destroyFftBlock();
     padW = nextPadW;
     padH = nextPadH;
-    const lineFactory = lineFftMode === 'radix4' ? createStockhamRadix4LineStrategy : undefined;
+    const lineFftStrategyFactory =
+      lineFftMode === 'radix-2' ? createStockhamRadix2LineStrategy : createStockhamRadix4LineStrategy;
     fft = createFft2d(root, {
       width: padW,
       height: padH,
       skipFinalTranspose: SKIP_FINAL_FFT_TRANSPOSE,
-      ...(lineFactory !== undefined ? { lineFftStrategyFactory: lineFactory } : {}),
+      lineFftStrategyFactory,
     });
     fftLineFftMode = lineFftMode;
     lastMagUniformKey = '';
@@ -785,7 +803,7 @@ videoFrameCallbackId = video.requestVideoFrameCallback(processVideoFrame);
 
 export const controls = defineControls({
   inverseFft: {
-    initial: true,
+    initial: false,
     onToggleChange: (value) => {
       applyInverseFft = value;
     },
@@ -805,7 +823,7 @@ export const controls = defineControls({
   },
   lineFft: {
     initial: lineFftMode,
-    options: ['radix4', 'default'],
+    options: ['radix-4 (default)', 'radix-2'],
     onSelectChange: (value) => {
       lineFftMode = value as LineFftMode;
     },
diff --git a/apps/typegpu-docs/src/examples/tests/fft-line-strategy-check/index.ts b/apps/typegpu-docs/src/examples/tests/fft-line-strategy-check/index.ts
@@ -1,10 +1,15 @@
 /**
- * Validates @typegpu/fft line strategies: GPU↔CPU (fft.js separable 2D), radix-4 vs default Stockham,
+ * Validates @typegpu/fft line strategies: GPU↔CPU (fft.js separable 2D), radix-4 vs radix-2 Stockham,
  * and forward→inverse round-trip (WebGPU).
  *
  * Run in the docs dev server: `/TypeGPU/examples#example=tests--fft-line-strategy-check` — needs WebGPU.
  */
-import { createFft2d, createStockhamRadix4LineStrategy, type Fft2d } from '@typegpu/fft';
+import {
+  createFft2d,
+  createStockhamRadix2LineStrategy,
+  createStockhamRadix4LineStrategy,
+  type Fft2d,
+} from '@typegpu/fft';
 import tgpu, { d } from 'typegpu';
 import FFT from 'fft.js';
 
@@ -140,7 +145,11 @@ const host = buildHostComplex();
 const cpuRef = hostToFloat64Interleaved(host);
 fft2dSeparableForward(cpuRef);
 
-const fftDefault = createFft2d(root, { width: W, height: H });
+const fftRadix2 = createFft2d(root, {
+  width: W,
+  height: H,
+  lineFftStrategyFactory: createStockhamRadix2LineStrategy,
+});
 const fftRadix4 = createFft2d(root, {
   width: W,
   height: H,
@@ -153,44 +162,44 @@ function loadAndForward(fft: Fft2d) {
 }
 
 for (let i = 0; i < WARMUP; i++) {
-  loadAndForward(fftDefault);
+  loadAndForward(fftRadix2);
   loadAndForward(fftRadix4);
 }
 
-loadAndForward(fftDefault);
+loadAndForward(fftRadix2);
 loadAndForward(fftRadix4);
 
-const outDefault = (await fftDefault.output().read()) as Vec2Like[];
+const outRadix2 = (await fftRadix2.output().read()) as Vec2Like[];
 const outRadix4 = (await fftRadix4.output().read()) as Vec2Like[];
 
 const parityR4 = diffStats(
   outRadix4,
-  Float64Array.from(outDefault.flatMap((v) => [v.x, v.y])),
+  Float64Array.from(outRadix2.flatMap((v) => [v.x, v.y])),
   1,
 );
 const parityR4Pass = parityR4.maxAbs < ERR_PASS_MAX && parityR4.rms < ERR_PASS_RMS;
 console.info(
-  `[fft-line-strategy-check] radix-4 vs default Stockham: maxAbs=${parityR4.maxAbs.toExponential(3)} rms=${parityR4.rms.toExponential(3)} → ${parityR4Pass ? 'PASS' : 'FAIL'}`,
+  `[fft-line-strategy-check] radix-4 vs radix-2 Stockham: maxAbs=${parityR4.maxAbs.toExponential(3)} rms=${parityR4.rms.toExponential(3)} → ${parityR4Pass ? 'PASS' : 'FAIL'}`,
 );
 
-fftDefault.input.write(host);
-submitEncodeForward(device, fftDefault);
-submitEncodeInverse(device, fftDefault);
-const outDefaultRt = (await fftDefault.output().read()) as Vec2Like[];
+fftRadix2.input.write(host);
+submitEncodeForward(device, fftRadix2);
+submitEncodeInverse(device, fftRadix2);
+const outRadix2Rt = (await fftRadix2.output().read()) as Vec2Like[];
 
 fftRadix4.input.write(host);
 submitEncodeForward(device, fftRadix4);
 submitEncodeInverse(device, fftRadix4);
 const outRadix4Rt = (await fftRadix4.output().read()) as Vec2Like[];
 
-const rtParity = gpuGpuDiff(outRadix4Rt, outDefaultRt);
+const rtParity = gpuGpuDiff(outRadix4Rt, outRadix2Rt);
 const rtPass = rtParity.maxAbs < ERR_PASS_MAX && rtParity.rms < ERR_PASS_RMS;
 console.info(
-  `[fft-line-strategy-check] forward→inverse round-trip radix-4 vs default: maxAbs=${rtParity.maxAbs.toExponential(3)} rms=${rtParity.rms.toExponential(3)} → ${rtPass ? 'PASS' : 'FAIL'}`,
+  `[fft-line-strategy-check] forward→inverse round-trip radix-4 vs radix-2: maxAbs=${rtParity.maxAbs.toExponential(3)} rms=${rtParity.rms.toExponential(3)} → ${rtPass ? 'PASS' : 'FAIL'}`,
 );
 
 let cpuScale = 1;
-let gpuCpu = diffStats(outDefault, cpuRef, cpuScale);
+let gpuCpu = diffStats(outRadix2, cpuRef, cpuScale);
 if (gpuCpu.maxAbs > ERR_PASS_MAX * 50) {
   const s =
     (() => {
@@ -199,16 +208,16 @@ if (gpuCpu.maxAbs > ERR_PASS_MAX * 50) {
       for (let i = 0; i < W * H; i++) {
         const cr = cpuRef[i * 2]!;
         const ci = cpuRef[i * 2 + 1]!;
-        const gr = outDefault[i]!.x;
-        const gi = outDefault[i]!.y;
+        const gr = outRadix2[i]!.x;
+        const gi = outRadix2[i]!.y;
         num += gr * cr + gi * ci;
         den += cr * cr + ci * ci;
       }
       return den > 1e-20 ? num / den : 1;
     })();
   if (Number.isFinite(s) && Math.abs(s - 1) > 0.01) {
     cpuScale = s;
-    gpuCpu = diffStats(outDefault, cpuRef, cpuScale);
+    gpuCpu = diffStats(outRadix2, cpuRef, cpuScale);
     console.info(
       `[fft-line-strategy-check] applied CPU ref scale=${cpuScale.toExponential(6)} (fft.js vs unnormalized Stockham)`,
     );
@@ -217,9 +226,9 @@ if (gpuCpu.maxAbs > ERR_PASS_MAX * 50) {
 
 const refPass = gpuCpu.maxAbs < ERR_PASS_MAX && gpuCpu.rms < ERR_PASS_RMS;
 console.info(
-  `[fft-line-strategy-check] GPU default vs CPU fft.js (2D separable): maxAbs=${gpuCpu.maxAbs.toExponential(3)} rms=${gpuCpu.rms.toExponential(3)} → ${refPass ? 'PASS' : 'FAIL'}`,
+  `[fft-line-strategy-check] GPU radix-2 vs CPU fft.js (2D separable): maxAbs=${gpuCpu.maxAbs.toExponential(3)} rms=${gpuCpu.rms.toExponential(3)} → ${refPass ? 'PASS' : 'FAIL'}`,
 );
 
-fftDefault.destroy();
+fftRadix2.destroy();
 fftRadix4.destroy();
 root.destroy();
diff --git a/packages/typegpu-fft/package.json b/packages/typegpu-fft/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@typegpu/fft",
   "version": "0.10.0",
-  "description": "2D GPU FFT for TypeGPU: Stockham radix-2 (default) and optional radix-4 line strategy.",
+  "description": "2D GPU FFT for TypeGPU: Stockham-style radix-4 line FFT (default) and optional radix-2 reference path.",
   "keywords": [
     "webgpu",
     "fft",
diff --git a/packages/typegpu-fft/src/fft2d.ts b/packages/typegpu-fft/src/fft2d.ts
@@ -1,10 +1,7 @@
 import type { StorageFlag, TgpuBindGroup, TgpuBuffer, TgpuRoot, UniformFlag } from 'typegpu';
 import { d } from 'typegpu';
-import {
-  createStockhamRadix2LineStrategy,
-  type LineFftEncodeOptions,
-  type LineFftStrategyFactory,
-} from './lineFftStrategy.ts';
+import type { LineFftEncodeOptions, LineFftStrategyFactory } from './lineFftStrategy.ts';
+import { createStockhamRadix4LineStrategy } from './lineFftRadix4Strategy.ts';
 import {
   createTransposePipeline,
   dispatchTranspose,
@@ -35,8 +32,9 @@ export type Fft2dOptions = {
    */
   skipFinalTranspose?: boolean;
   /**
-   * Pluggable 1D line FFT (Stockham along contiguous lines). Default: radix-2 Stockham via
-   * {@link createStockhamRadix2LineStrategy}.
+   * Pluggable 1D line FFT (Stockham-style along contiguous lines). Default: faster radix-4 + optional
+   * radix-2 tail via {@link createStockhamRadix4LineStrategy}. Pass {@link createStockhamRadix2LineStrategy}
+   * for pure radix-2 Stockham.
    */
   lineFftStrategyFactory?: LineFftStrategyFactory;
 };
@@ -70,8 +68,9 @@ export type Fft2d = {
 };
 
 /**
- * Radix-2 (default) or pluggable line-strategy 2D FFT on row-major `width×height` complex buffers
- * (`input` = `buffers[0]`). All GPU work is recorded on a caller-owned {@link GPUComputePassEncoder}.
+ * 2D FFT on row-major `width×height` complex buffers (`input` = `buffers[0]`). Default line FFT is
+ * radix-4 Stockham (Bainville) with an optional radix-2 tail; override with {@link Fft2dOptions.lineFftStrategyFactory}.
+ * All GPU work is recorded on a caller-owned {@link GPUComputePassEncoder}.
  *
  * Per-stage uniform buffers are distinct so many dispatches can be recorded in one pass before `submit`
  * without last-write-wins. Line strategies use four duplicate uniform pools (slots `0`–`3`) for row/column
@@ -80,7 +79,7 @@ export type Fft2d = {
 export function createFft2d(root: TgpuRoot, options: Fft2dOptions): Fft2d {
   const { width: W, height: H } = options;
   const skipFinalTranspose = options.skipFinalTranspose === true;
-  const lineFftFactory = options.lineFftStrategyFactory ?? createStockhamRadix2LineStrategy;
+  const lineFftFactory = options.lineFftStrategyFactory ?? createStockhamRadix4LineStrategy;
 
   if (W <= 0 || H <= 0) {
     throw new Error('FFT dimensions must be positive');
diff --git a/packages/typegpu-fft/src/index.ts b/packages/typegpu-fft/src/index.ts
@@ -1,4 +1,5 @@
 export { complexMul } from './complex.ts';
 export { createFft2d, type Fft2d, type Fft2dOptions } from './fft2d.ts';
 export { createStockhamRadix4LineStrategy } from './lineFftRadix4Strategy.ts';
+export { createStockhamRadix2LineStrategy } from './lineFftStrategy.ts';
 export type { LineFftStrategyFactory } from './lineFftStrategy.ts';
diff --git a/packages/typegpu-fft/src/lineFftRadix4Strategy.ts b/packages/typegpu-fft/src/lineFftRadix4Strategy.ts
@@ -1,6 +1,7 @@
 import { d } from 'typegpu';
 import {
   buildStockhamTwiddleLut,
+  createStockhamDifStagePipeline,
   createStockhamStagePipeline,
   stockhamLayout,
   stockhamUniformType,
@@ -29,6 +30,7 @@ export function createStockhamRadix4LineStrategy(
   const radix4Pipeline = createRadix4StagePipeline(root);
   const radix4InversePipeline = createRadix4InverseStagePipeline(root);
   const stockhamPipeline = createStockhamStagePipeline(root);
+  const stockhamDifPipeline = createStockhamDifStagePipeline(root);
 
   const twiddleLutLen = nMax - 1;
   const twiddleLut = root.createBuffer(d.arrayOf(d.vec2f, twiddleLutLen)).$usage('storage');
@@ -98,6 +100,7 @@ export function createStockhamRadix4LineStrategy(
         radix4Pipeline,
         radix4InversePipeline,
         stockhamPipeline,
+        stockhamDifPipeline,
         radix4Pools,
         n,
         lineStride,
diff --git a/packages/typegpu-fft/src/lineFftStrategy.ts b/packages/typegpu-fft/src/lineFftStrategy.ts
@@ -37,7 +37,7 @@ export type LineFftStrategyFactoryContext = {
  * Row and column passes in {@link createFft2d} both use this; transpose stays separate.
  *
  * **Contract:** `dispatchLineFft(..., { inverse: false })` and `{ inverse: true }` must implement the **same**
- * unnormalized complex DFT / IDFT pair as the default Stockham radix-2 path (same convention as {@link createFft2d}).
+ * unnormalized complex DFT / IDFT pair as the reference Stockham radix-2 path ({@link createStockhamRadix2LineStrategy}).
  * A faster **forward** factorization (e.g. radix-4 stages) still defines one linear operator `F`; the inverse pass
  * must apply `F⁻¹`, not merely “reverse stages and conjugate twiddles” unless that has been shown equivalent.
  * It is valid for `inverse: true` to use a different **sequence** of kernels (e.g. full Stockham inverse) as long
@@ -69,8 +69,8 @@ export type LineFftStrategy = {
 export type LineFftStrategyFactory = (ctx: LineFftStrategyFactoryContext) => LineFftStrategy;
 
 /**
- * Default: radix-2 Stockham stages (current @typegpu/fft behavior). Use as
- * `lineFftStrategyFactory: createStockhamRadix2LineStrategy` or rely on {@link createFft2d} default.
+ * Pure radix-2 Stockham line FFT (reference implementation). {@link createFft2d} defaults to the faster
+ * {@link createStockhamRadix4LineStrategy} instead; pass this factory to opt into radix-2 only.
  */
 export function createStockhamRadix2LineStrategy(
   ctx: LineFftStrategyFactoryContext,
diff --git a/packages/typegpu-fft/src/stockham.ts b/packages/typegpu-fft/src/stockham.ts
diff --git a/packages/typegpu-fft/src/stockhamRadix4.ts b/packages/typegpu-fft/src/stockhamRadix4.ts

Original file line number	Diff line number	Diff line change
`@@ -1,7 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "@typegpu/fft",`
`3`	`3`	`"version": "0.10.0",`
`4`		`- "description": "2D GPU FFT for TypeGPU: Stockham radix-2 (default) and optional radix-4 line strategy.",`
	`4`	`+ "description": "2D GPU FFT for TypeGPU: Stockham-style radix-4 line FFT (default) and optional radix-2 reference path.",`
`5`	`5`	`"keywords": [`
`6`	`6`	`"webgpu",`
`7`	`7`	`"fft",`