Skip to content

Commit 37dc6ff

Browse files
authored
Merge pull request #226 from githubnext/copilot/evolve-series-sortvalues
Series.sortValues: LSD radix sort on IEEE-754 transformed keys
2 parents 52a277f + b33273e commit 37dc6ff

1 file changed

Lines changed: 135 additions & 27 deletions

File tree

src/core/series.ts

Lines changed: 135 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,20 @@ function pearsonCorrFromArrays(
130130
return denom === 0 ? Number.NaN : num / denom;
131131
}
132132

133+
// ─── LSD radix sort buffers (module-level, grown lazily) ─────────────────────
134+
135+
/** Ping-pong index buffers for the 8-pass LSD radix sort numeric fast path. */
136+
let _rxA_idx: Uint32Array = new Uint32Array(0);
137+
let _rxB_idx: Uint32Array = new Uint32Array(0);
138+
/** Low 32 bits of each element's IEEE-754 sortable key (ping-pong). */
139+
let _rxA_lo: Uint32Array = new Uint32Array(0);
140+
let _rxB_lo: Uint32Array = new Uint32Array(0);
141+
/** High 32 bits of each element's IEEE-754 sortable key (ping-pong). */
142+
let _rxA_hi: Uint32Array = new Uint32Array(0);
143+
let _rxB_hi: Uint32Array = new Uint32Array(0);
144+
/** 256-bucket histogram reused every pass (never reallocated). */
145+
const _rxCnt: Uint32Array = new Uint32Array(256);
146+
133147
// ─── SeriesOptions ────────────────────────────────────────────────────────────
134148

135149
/** Constructor options accepted by `Series`. */
@@ -716,8 +730,7 @@ export class Series<T extends Scalar = Scalar> {
716730
const vals = this._values;
717731

718732
// Pre-partition NaN/null/undefined from finite values in one pass.
719-
// fvals stores numeric values by original row index so the sort comparator
720-
// can read a typed Float64Array (not a generic T[]) at index a/b.
733+
// fvals stores numeric values by original row index (sparse: fvals[origIdx]).
721734
const finBuf = new Uint32Array(n);
722735
const nanBuf = new Uint32Array(n);
723736
const fvals = new Float64Array(n);
@@ -727,41 +740,132 @@ export class Series<T extends Scalar = Scalar> {
727740
for (let i = 0; i < n; i++) {
728741
const v = vals[i];
729742
if (v === null || v === undefined || (typeof v === "number" && Number.isNaN(v))) {
730-
nanBuf[nanCount++] = i;
743+
nanBuf[nanCount] = i;
744+
nanCount = nanCount + 1;
731745
} else {
732746
if (typeof v === "number") {
733747
fvals[i] = v;
734748
} else {
735749
allNumeric = false;
736750
}
737-
finBuf[finCount++] = i;
751+
finBuf[finCount] = i;
752+
finCount = finCount + 1;
738753
}
739754
}
740755

741-
// Sort the finite-index slice in-place.
742-
// For all-numeric data use the Float64Array subtraction comparator —
743-
// monomorphic, branchless, and JIT-specialisable.
744-
// For mixed/string data fall back to the generic branch comparator.
745756
const finSlice = finBuf.subarray(0, finCount);
746-
if (allNumeric) {
757+
758+
if (allNumeric && finCount > 0) {
759+
// ── LSD radix sort: 8 passes × 8 bits over IEEE-754 transformed keys ──
760+
// Eliminates all JS comparator callbacks (the bottleneck at n≥10k).
761+
762+
// Grow module-level ping-pong buffers if needed.
763+
if (_rxA_idx.length < finCount) {
764+
_rxA_idx = new Uint32Array(finCount);
765+
_rxB_idx = new Uint32Array(finCount);
766+
_rxA_lo = new Uint32Array(finCount);
767+
_rxB_lo = new Uint32Array(finCount);
768+
_rxA_hi = new Uint32Array(finCount);
769+
_rxB_hi = new Uint32Array(finCount);
770+
}
771+
772+
// fvals is a Float64Array; reinterpret its buffer as Uint32 to read raw bits.
773+
// On little-endian (x86/ARM): u32[2i] = lo 32 bits, u32[2i+1] = hi 32 bits.
774+
const fvalsU32 = new Uint32Array(fvals.buffer);
775+
776+
// Initialise ping arrays with identity indices and IEEE-754 sort keys.
777+
// Transform: positive floats → XOR sign bit; negative → XOR all bits.
778+
// This maps floats to an unsigned integer order that matches numeric order.
779+
for (let i = 0; i < finCount; i++) {
780+
const origIdx = finSlice[i]!;
781+
_rxA_idx[i] = origIdx;
782+
let lo = fvalsU32[origIdx * 2]!;
783+
let hi = fvalsU32[origIdx * 2 + 1]!;
784+
if (hi & 0x80000000) {
785+
lo = ~lo >>> 0;
786+
hi = ~hi >>> 0;
787+
} else {
788+
hi = (hi ^ 0x80000000) >>> 0;
789+
}
790+
_rxA_lo[i] = lo;
791+
_rxA_hi[i] = hi;
792+
}
793+
794+
// 8-pass LSD: passes 0–3 over lo word, passes 4–7 over hi word.
795+
let srcIdx = _rxA_idx;
796+
let dstIdx = _rxB_idx;
797+
let srcLo = _rxA_lo;
798+
let dstLo = _rxB_lo;
799+
let srcHi = _rxA_hi;
800+
let dstHi = _rxB_hi;
801+
802+
for (let pass = 0; pass < 8; pass++) {
803+
// Build histogram for this byte.
804+
_rxCnt.fill(0);
805+
const useHi = pass >= 4;
806+
const shift = (pass % 4) * 8;
807+
for (let i = 0; i < finCount; i++) {
808+
const word = useHi ? srcHi[i]! : srcLo[i]!;
809+
const bucket = (word >>> shift) & 0xff;
810+
const c = _rxCnt[bucket]!;
811+
_rxCnt[bucket] = c + 1;
812+
}
813+
// Prefix sum → scatter offsets.
814+
let total = 0;
815+
for (let b = 0; b < 256; b++) {
816+
const c = _rxCnt[b]!;
817+
_rxCnt[b] = total;
818+
total = total + c;
819+
}
820+
// Scatter elements into destination.
821+
for (let i = 0; i < finCount; i++) {
822+
const word = useHi ? srcHi[i]! : srcLo[i]!;
823+
const bucket = (word >>> shift) & 0xff;
824+
const p = _rxCnt[bucket]!;
825+
_rxCnt[bucket] = p + 1;
826+
dstIdx[p] = srcIdx[i]!;
827+
dstLo[p] = srcLo[i]!;
828+
dstHi[p] = srcHi[i]!;
829+
}
830+
// Swap ping-pong references.
831+
const ti = srcIdx;
832+
srcIdx = dstIdx;
833+
dstIdx = ti;
834+
const tl = srcLo;
835+
srcLo = dstLo;
836+
dstLo = tl;
837+
const th = srcHi;
838+
srcHi = dstHi;
839+
dstHi = th;
840+
}
841+
842+
// After 8 passes (even number), srcIdx holds ascending sorted original indices.
747843
if (ascending) {
748-
finSlice.sort((a, b) => fvals[a]! - fvals[b]!);
844+
for (let i = 0; i < finCount; i++) {
845+
finSlice[i] = srcIdx[i]!;
846+
}
749847
} else {
750-
finSlice.sort((a, b) => fvals[b]! - fvals[a]!);
848+
for (let i = 0, j = finCount - 1; i < finCount; i = i + 1, j = j - 1) {
849+
finSlice[i] = srcIdx[j]!;
850+
}
851+
}
852+
} else if (!allNumeric) {
853+
// String / mixed dtype: fall back to comparator-based sort.
854+
if (ascending) {
855+
finSlice.sort((a, b) => {
856+
const av = vals[a] as number | string | boolean;
857+
const bv = vals[b] as number | string | boolean;
858+
return av < bv ? -1 : av > bv ? 1 : 0;
859+
});
860+
} else {
861+
finSlice.sort((a, b) => {
862+
const av = vals[a] as number | string | boolean;
863+
const bv = vals[b] as number | string | boolean;
864+
return av > bv ? -1 : av < bv ? 1 : 0;
865+
});
751866
}
752-
} else if (ascending) {
753-
finSlice.sort((a, b) => {
754-
const av = vals[a] as number | string | boolean;
755-
const bv = vals[b] as number | string | boolean;
756-
return av < bv ? -1 : av > bv ? 1 : 0;
757-
});
758-
} else {
759-
finSlice.sort((a, b) => {
760-
const av = vals[a] as number | string | boolean;
761-
const bv = vals[b] as number | string | boolean;
762-
return av > bv ? -1 : av < bv ? 1 : 0;
763-
});
764867
}
868+
// else: allNumeric && finCount === 0 — nothing to sort.
765869

766870
// Build the output permutation and gather values in a single pass.
767871
const perm = new Array<number>(n);
@@ -771,23 +875,27 @@ export class Series<T extends Scalar = Scalar> {
771875
for (let i = 0; i < nanCount; i++) {
772876
const idx = nanBuf[i]!;
773877
perm[pos] = idx;
774-
outData[pos++] = vals[idx] as T;
878+
outData[pos] = vals[idx] as T;
879+
pos = pos + 1;
775880
}
776881
for (let i = 0; i < finCount; i++) {
777882
const idx = finSlice[i]!;
778883
perm[pos] = idx;
779-
outData[pos++] = vals[idx] as T;
884+
outData[pos] = vals[idx] as T;
885+
pos = pos + 1;
780886
}
781887
} else {
782888
for (let i = 0; i < finCount; i++) {
783889
const idx = finSlice[i]!;
784890
perm[pos] = idx;
785-
outData[pos++] = vals[idx] as T;
891+
outData[pos] = vals[idx] as T;
892+
pos = pos + 1;
786893
}
787894
for (let i = 0; i < nanCount; i++) {
788895
const idx = nanBuf[i]!;
789896
perm[pos] = idx;
790-
outData[pos++] = vals[idx] as T;
897+
outData[pos] = vals[idx] as T;
898+
pos = pos + 1;
791899
}
792900
}
793901

0 commit comments

Comments
 (0)