@@ -130,6 +130,20 @@ function pearsonCorrFromArrays(
130130 return denom === 0 ? Number . NaN : num / denom ;
131131}
132132
133+ // ─── LSD radix sort buffers (module-level, grown lazily) ─────────────────────
134+
135+ /** Ping-pong index buffers for the 8-pass LSD radix sort numeric fast path. */
136+ let _rxA_idx : Uint32Array = new Uint32Array ( 0 ) ;
137+ let _rxB_idx : Uint32Array = new Uint32Array ( 0 ) ;
138+ /** Low 32 bits of each element's IEEE-754 sortable key (ping-pong). */
139+ let _rxA_lo : Uint32Array = new Uint32Array ( 0 ) ;
140+ let _rxB_lo : Uint32Array = new Uint32Array ( 0 ) ;
141+ /** High 32 bits of each element's IEEE-754 sortable key (ping-pong). */
142+ let _rxA_hi : Uint32Array = new Uint32Array ( 0 ) ;
143+ let _rxB_hi : Uint32Array = new Uint32Array ( 0 ) ;
144+ /** 256-bucket histogram reused every pass (never reallocated). */
145+ const _rxCnt : Uint32Array = new Uint32Array ( 256 ) ;
146+
133147// ─── SeriesOptions ────────────────────────────────────────────────────────────
134148
135149/** Constructor options accepted by `Series`. */
@@ -716,8 +730,7 @@ export class Series<T extends Scalar = Scalar> {
716730 const vals = this . _values ;
717731
718732 // Pre-partition NaN/null/undefined from finite values in one pass.
719- // fvals stores numeric values by original row index so the sort comparator
720- // can read a typed Float64Array (not a generic T[]) at index a/b.
733+ // fvals stores numeric values by original row index (sparse: fvals[origIdx]).
721734 const finBuf = new Uint32Array ( n ) ;
722735 const nanBuf = new Uint32Array ( n ) ;
723736 const fvals = new Float64Array ( n ) ;
@@ -727,41 +740,132 @@ export class Series<T extends Scalar = Scalar> {
727740 for ( let i = 0 ; i < n ; i ++ ) {
728741 const v = vals [ i ] ;
729742 if ( v === null || v === undefined || ( typeof v === "number" && Number . isNaN ( v ) ) ) {
730- nanBuf [ nanCount ++ ] = i ;
743+ nanBuf [ nanCount ] = i ;
744+ nanCount = nanCount + 1 ;
731745 } else {
732746 if ( typeof v === "number" ) {
733747 fvals [ i ] = v ;
734748 } else {
735749 allNumeric = false ;
736750 }
737- finBuf [ finCount ++ ] = i ;
751+ finBuf [ finCount ] = i ;
752+ finCount = finCount + 1 ;
738753 }
739754 }
740755
741- // Sort the finite-index slice in-place.
742- // For all-numeric data use the Float64Array subtraction comparator —
743- // monomorphic, branchless, and JIT-specialisable.
744- // For mixed/string data fall back to the generic branch comparator.
745756 const finSlice = finBuf . subarray ( 0 , finCount ) ;
746- if ( allNumeric ) {
757+
758+ if ( allNumeric && finCount > 0 ) {
759+ // ── LSD radix sort: 8 passes × 8 bits over IEEE-754 transformed keys ──
760+ // Eliminates all JS comparator callbacks (the bottleneck at n≥10k).
761+
762+ // Grow module-level ping-pong buffers if needed.
763+ if ( _rxA_idx . length < finCount ) {
764+ _rxA_idx = new Uint32Array ( finCount ) ;
765+ _rxB_idx = new Uint32Array ( finCount ) ;
766+ _rxA_lo = new Uint32Array ( finCount ) ;
767+ _rxB_lo = new Uint32Array ( finCount ) ;
768+ _rxA_hi = new Uint32Array ( finCount ) ;
769+ _rxB_hi = new Uint32Array ( finCount ) ;
770+ }
771+
772+ // fvals is a Float64Array; reinterpret its buffer as Uint32 to read raw bits.
773+ // On little-endian (x86/ARM): u32[2i] = lo 32 bits, u32[2i+1] = hi 32 bits.
774+ const fvalsU32 = new Uint32Array ( fvals . buffer ) ;
775+
776+ // Initialise ping arrays with identity indices and IEEE-754 sort keys.
777+ // Transform: positive floats → XOR sign bit; negative → XOR all bits.
778+ // This maps floats to an unsigned integer order that matches numeric order.
779+ for ( let i = 0 ; i < finCount ; i ++ ) {
780+ const origIdx = finSlice [ i ] ! ;
781+ _rxA_idx [ i ] = origIdx ;
782+ let lo = fvalsU32 [ origIdx * 2 ] ! ;
783+ let hi = fvalsU32 [ origIdx * 2 + 1 ] ! ;
784+ if ( hi & 0x80000000 ) {
785+ lo = ~ lo >>> 0 ;
786+ hi = ~ hi >>> 0 ;
787+ } else {
788+ hi = ( hi ^ 0x80000000 ) >>> 0 ;
789+ }
790+ _rxA_lo [ i ] = lo ;
791+ _rxA_hi [ i ] = hi ;
792+ }
793+
794+ // 8-pass LSD: passes 0–3 over lo word, passes 4–7 over hi word.
795+ let srcIdx = _rxA_idx ;
796+ let dstIdx = _rxB_idx ;
797+ let srcLo = _rxA_lo ;
798+ let dstLo = _rxB_lo ;
799+ let srcHi = _rxA_hi ;
800+ let dstHi = _rxB_hi ;
801+
802+ for ( let pass = 0 ; pass < 8 ; pass ++ ) {
803+ // Build histogram for this byte.
804+ _rxCnt . fill ( 0 ) ;
805+ const useHi = pass >= 4 ;
806+ const shift = ( pass % 4 ) * 8 ;
807+ for ( let i = 0 ; i < finCount ; i ++ ) {
808+ const word = useHi ? srcHi [ i ] ! : srcLo [ i ] ! ;
809+ const bucket = ( word >>> shift ) & 0xff ;
810+ const c = _rxCnt [ bucket ] ! ;
811+ _rxCnt [ bucket ] = c + 1 ;
812+ }
813+ // Prefix sum → scatter offsets.
814+ let total = 0 ;
815+ for ( let b = 0 ; b < 256 ; b ++ ) {
816+ const c = _rxCnt [ b ] ! ;
817+ _rxCnt [ b ] = total ;
818+ total = total + c ;
819+ }
820+ // Scatter elements into destination.
821+ for ( let i = 0 ; i < finCount ; i ++ ) {
822+ const word = useHi ? srcHi [ i ] ! : srcLo [ i ] ! ;
823+ const bucket = ( word >>> shift ) & 0xff ;
824+ const p = _rxCnt [ bucket ] ! ;
825+ _rxCnt [ bucket ] = p + 1 ;
826+ dstIdx [ p ] = srcIdx [ i ] ! ;
827+ dstLo [ p ] = srcLo [ i ] ! ;
828+ dstHi [ p ] = srcHi [ i ] ! ;
829+ }
830+ // Swap ping-pong references.
831+ const ti = srcIdx ;
832+ srcIdx = dstIdx ;
833+ dstIdx = ti ;
834+ const tl = srcLo ;
835+ srcLo = dstLo ;
836+ dstLo = tl ;
837+ const th = srcHi ;
838+ srcHi = dstHi ;
839+ dstHi = th ;
840+ }
841+
842+ // After 8 passes (even number), srcIdx holds ascending sorted original indices.
747843 if ( ascending ) {
748- finSlice . sort ( ( a , b ) => fvals [ a ] ! - fvals [ b ] ! ) ;
844+ for ( let i = 0 ; i < finCount ; i ++ ) {
845+ finSlice [ i ] = srcIdx [ i ] ! ;
846+ }
749847 } else {
750- finSlice . sort ( ( a , b ) => fvals [ b ] ! - fvals [ a ] ! ) ;
848+ for ( let i = 0 , j = finCount - 1 ; i < finCount ; i = i + 1 , j = j - 1 ) {
849+ finSlice [ i ] = srcIdx [ j ] ! ;
850+ }
851+ }
852+ } else if ( ! allNumeric ) {
853+ // String / mixed dtype: fall back to comparator-based sort.
854+ if ( ascending ) {
855+ finSlice . sort ( ( a , b ) => {
856+ const av = vals [ a ] as number | string | boolean ;
857+ const bv = vals [ b ] as number | string | boolean ;
858+ return av < bv ? - 1 : av > bv ? 1 : 0 ;
859+ } ) ;
860+ } else {
861+ finSlice . sort ( ( a , b ) => {
862+ const av = vals [ a ] as number | string | boolean ;
863+ const bv = vals [ b ] as number | string | boolean ;
864+ return av > bv ? - 1 : av < bv ? 1 : 0 ;
865+ } ) ;
751866 }
752- } else if ( ascending ) {
753- finSlice . sort ( ( a , b ) => {
754- const av = vals [ a ] as number | string | boolean ;
755- const bv = vals [ b ] as number | string | boolean ;
756- return av < bv ? - 1 : av > bv ? 1 : 0 ;
757- } ) ;
758- } else {
759- finSlice . sort ( ( a , b ) => {
760- const av = vals [ a ] as number | string | boolean ;
761- const bv = vals [ b ] as number | string | boolean ;
762- return av > bv ? - 1 : av < bv ? 1 : 0 ;
763- } ) ;
764867 }
868+ // else: allNumeric && finCount === 0 — nothing to sort.
765869
766870 // Build the output permutation and gather values in a single pass.
767871 const perm = new Array < number > ( n ) ;
@@ -771,23 +875,27 @@ export class Series<T extends Scalar = Scalar> {
771875 for ( let i = 0 ; i < nanCount ; i ++ ) {
772876 const idx = nanBuf [ i ] ! ;
773877 perm [ pos ] = idx ;
774- outData [ pos ++ ] = vals [ idx ] as T ;
878+ outData [ pos ] = vals [ idx ] as T ;
879+ pos = pos + 1 ;
775880 }
776881 for ( let i = 0 ; i < finCount ; i ++ ) {
777882 const idx = finSlice [ i ] ! ;
778883 perm [ pos ] = idx ;
779- outData [ pos ++ ] = vals [ idx ] as T ;
884+ outData [ pos ] = vals [ idx ] as T ;
885+ pos = pos + 1 ;
780886 }
781887 } else {
782888 for ( let i = 0 ; i < finCount ; i ++ ) {
783889 const idx = finSlice [ i ] ! ;
784890 perm [ pos ] = idx ;
785- outData [ pos ++ ] = vals [ idx ] as T ;
891+ outData [ pos ] = vals [ idx ] as T ;
892+ pos = pos + 1 ;
786893 }
787894 for ( let i = 0 ; i < nanCount ; i ++ ) {
788895 const idx = nanBuf [ i ] ! ;
789896 perm [ pos ] = idx ;
790- outData [ pos ++ ] = vals [ idx ] as T ;
897+ outData [ pos ] = vals [ idx ] as T ;
898+ pos = pos + 1 ;
791899 }
792900 }
793901
0 commit comments