Skip to content

Commit cde165b

Browse files
committed
Squeeze max
1 parent c2f8ad1 commit cde165b

3 files changed

Lines changed: 39 additions & 101 deletions

File tree

benchmark.md

Lines changed: 0 additions & 48 deletions
This file was deleted.

index.js

Lines changed: 29 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,23 @@ function init(N) {
1515
const half = N >>> 1
1616
const x = new Float64Array(N)
1717
const spectrum = new Float64Array(half)
18-
const re = new Float64Array(half + 1)
1918
const im = new Float64Array(half + 1)
19+
const re = x.subarray(0, half + 1) // zero-copy view into x
2020
const complex = { re, im }
2121
const bSi = 2 / N
2222

23+
// Precompute bit-reversal permutation table
24+
const bits = 31 - Math.clz32(N)
25+
const perm = new Uint32Array(N)
26+
for (let i = 0; i < N; i++) {
27+
let rev = 0, v = i
28+
for (let j = 0; j < bits; j++) {
29+
rev = (rev << 1) | (v & 1)
30+
v >>= 1
31+
}
32+
perm[i] = rev
33+
}
34+
2335
// Count twiddle factors per stage
2436
let total = 0, n2 = 2, nn = half
2537
const stages = []
@@ -52,7 +64,7 @@ function init(N) {
5264
si++
5365
}
5466

55-
const entry = { x, spectrum, complex, bSi, tw, stages }
67+
const entry = { x, spectrum, complex, bSi, tw, stages, perm }
5668
cache.set(N, entry)
5769
return entry
5870
}
@@ -71,9 +83,10 @@ function transform(input) {
7183
if (N < 2 || (N & (N - 1))) throw Error('Input length must be a power of 2 (>= 2).')
7284

7385
const entry = getEntry(N)
74-
const { x, tw, stages } = entry
86+
const { x, tw, stages, perm } = entry
7587

76-
reverseBinPermute(N, x, input)
88+
// Bit-reversal permutation via precomputed table
89+
for (let i = 0; i < N; i++) x[i] = input[perm[i]]
7790

7891
// First pass: length-2 butterflies
7992
for (let ix = 0, id = 4; ix < N; id *= 4) {
@@ -216,48 +229,19 @@ export function fft(input, output) {
216229
const N = input.length
217230
const half = N >>> 1
218231
const { x, complex } = entry
219-
const re = output ? output.re : complex.re
220-
const im = output ? output.im : complex.im
221-
222-
re[0] = x[0]
223-
im[0] = 0
224-
for (let k = 1; k < half; k++) {
225-
re[k] = x[k]
226-
im[k] = x[N - k]
227-
}
228-
re[half] = x[half]
229-
im[half] = 0
230-
231-
return output || complex
232-
}
233-
234-
function reverseBinPermute(N, dest, source) {
235-
const halfSize = N >>> 1
236-
const nm1 = N - 1
237232

238-
dest[0] = source[0]
239-
if (halfSize < 2) { dest[nm1] = source[nm1]; return }
240-
241-
let i = 1, r = 0
242-
243-
do {
244-
r += halfSize
245-
dest[i] = source[r]
246-
dest[r] = source[i]
247-
248-
i++
249-
250-
let h = halfSize << 1
251-
while (h = h >> 1, !((r ^= h) & h)) {}
233+
if (output) {
234+
const re = output.re, im = output.im
235+
for (let k = 0; k <= half; k++) re[k] = x[k]
236+
im[0] = 0; im[half] = 0
237+
for (let k = 1; k < half; k++) im[k] = x[N - k]
238+
return output
239+
}
252240

253-
if (r >= i) {
254-
dest[i] = source[r]
255-
dest[r] = source[i]
256-
dest[nm1 - i] = source[nm1 - r]
257-
dest[nm1 - r] = source[nm1 - i]
258-
}
259-
i++
260-
} while (i < halfSize)
241+
// re is already a zero-copy view of x[0..half] — no copy needed
242+
const im = complex.im
243+
im[0] = 0; im[half] = 0
244+
for (let k = 1; k < half; k++) im[k] = x[N - k]
261245

262-
dest[nm1] = source[nm1]
246+
return complex
263247
}

readme.md

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,16 +52,18 @@ rfft(signal, out) // safe to keep
5252
N=4096 real-valued FFT, complex output, 20k iterations (lower is better):
5353

5454
```
55-
fft.js (indutny) 16.0µs/call — radix-4
56-
fourier-transform 17.4µs/call ×1.1 — split-radix
57-
ooura 23.0µs/call ×1.4 — Ooura C port
58-
ml-fft 36.7µs/call ×2.3
59-
dsp.js 47.0µs/call ×2.9 — original split-radix ancestor
60-
kissfft-wasm 48.5µs/call ×3.0 — WASM KissFFT
61-
ndarray-fft 61.4µs/call ×3.8
62-
fft-js 2370.9µs/call ×148.3 — naive recursive
55+
fft.js (indutny) 16.2µs ×1.0 — radix-4, interleaved output
56+
fourier-transform 17.3µs ×1.1 — split-radix, separate re/im
57+
ooura 23.1µs ×1.4 — Ooura C port
58+
ml-fft 36.0µs ×2.2
59+
dsp.js 47.1µs ×2.9 — our split-radix ancestor
60+
kissfft-wasm 49.4µs ×3.1 — WASM KissFFT
61+
ndarray-fft 62.6µs ×3.9
62+
fft-js 2297.4µs ×142 — naive recursive
6363
```
6464

65+
Raw transform speed is identical to fft.js. The ×1.1 gap is entirely the cost of returning separate `re`/`im` arrays vs interleaved output.
66+
6567
`npm run benchmark` to reproduce.
6668

6769
## Acknowledgments

0 commit comments

Comments
 (0)