@@ -61,7 +61,7 @@ public static class PredicatesAdaptive
6161 /// zero if collinear, or a negative value if to the right.
6262 /// </returns>
6363 /// <seealso cref="PredicatesExact.Orient2d(double, double, double, double, double, double)"/>
64- [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
64+ [ MethodImpl ( MethodImplOptions . AggressiveInlining | MethodImplOptions . AggressiveOptimization ) ] // opt-15: aggressive JIT opt for fast-path Stage A
6565 [ SkipLocalsInit ]
6666 public static double Orient2d (
6767 double ax , double ay , double bx , double by , double cx , double cy )
@@ -195,7 +195,7 @@ public static float Orient2d(
195195 /// zero if on, or a negative value if outside.
196196 /// </returns>
197197 /// <seealso cref="PredicatesExact.InCircle(double, double, double, double, double, double, double, double)"/>
198- [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
198+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ] // opt-16: keep only AggressiveInlining (AggressiveOptimization inflated the Stage-B frame and hurt Stage-A)
199199 [ SkipLocalsInit ]
200200 public static double InCircle (
201201 double ax , double ay , double bx , double by ,
@@ -390,6 +390,7 @@ internal static double MultTail(double a, double b, double p)
390390 /// Matches Lenthe <c>ExpansionBase::TwoTwoDiff</c>.
391391 /// </summary>
392392 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
393+ [ SkipLocalsInit ] // opt-11: x0..x3 are all unconditionally computed before conditional write
393394 internal static int TwoTwoDiff ( double ax , double by , double ay , double bx , Span < double > h )
394395 {
395396 double axby1 = ax * by ;
@@ -419,6 +420,8 @@ internal static int TwoTwoDiff(double ax, double by, double ay, double bx, Span<
419420 /// Matches Lenthe <c>ExpansionBase::ScaleExpansion</c>.
420421 /// Output has up to <c>2*elen</c> terms.
421422 /// </summary>
423+ [ MethodImpl ( MethodImplOptions . AggressiveInlining | MethodImplOptions . AggressiveOptimization ) ] // opt-7, opt-18
424+ [ SkipLocalsInit ] // opt-8: locals (hIdx, Q, hh, Ti, ti, Qi) are all written before read
422425 internal static int ScaleExpansion ( Span < double > e , int elen , double b , Span < double > h )
423426 {
424427 if ( elen == 0 || b == 0.0 )
@@ -427,24 +430,30 @@ internal static int ScaleExpansion(Span<double> e, int elen, double b, Span<doub
427430 }
428431
429432 var ( bHi , bLo ) = Split ( b ) ;
430- double Q = e [ 0 ] * b ;
431- double hh = DekkersPresplit ( e [ 0 ] , bHi , bLo , Q ) ;
433+
434+ // opt-9: bounds-check-free loop via ref locals
435+ ref double eRef = ref MemoryMarshal . GetReference ( e ) ;
436+ ref double hRef = ref MemoryMarshal . GetReference ( h ) ;
437+
438+ double Q = Unsafe . Add ( ref eRef , 0 ) * b ;
439+ double hh = DekkersPresplit ( Unsafe . Add ( ref eRef , 0 ) , bHi , bLo , Q ) ;
432440 int hIdx = 0 ;
433- if ( hh != 0.0 ) { h [ hIdx ++ ] = hh ; }
441+ if ( hh != 0.0 ) { Unsafe . Add ( ref hRef , hIdx ++ ) = hh; }
434442
435443 for ( int i = 1 ; i < elen ; i ++ )
436444 {
437- double Ti = e [ i ] * b ;
438- double ti = DekkersPresplit ( e [ i ] , bHi , bLo , Ti ) ;
445+ double ei = Unsafe . Add ( ref eRef , i ) ;
446+ double Ti = ei * b ;
447+ double ti = DekkersPresplit ( ei , bHi , bLo , Ti ) ;
439448 double Qi = Q + ti ;
440449 hh = PlusTail ( Q , ti , Qi ) ;
441- if ( hh != 0.0 ) { h [ hIdx ++ ] = hh ; }
450+ if ( hh != 0.0 ) { Unsafe . Add ( ref hRef , hIdx ++ ) = hh; }
442451 Q = Ti + Qi ;
443452 hh = FastPlusTail ( Ti , Qi , Q ) ;
444- if ( hh != 0.0 ) { h [ hIdx ++ ] = hh ; }
453+ if ( hh != 0.0 ) { Unsafe . Add ( ref hRef , hIdx ++ ) = hh; }
445454 }
446455
447- if ( Q != 0.0 ) { h [ hIdx ++ ] = Q ; }
456+ if ( Q != 0.0 ) { Unsafe . Add ( ref hRef , hIdx ++ ) = Q; }
448457 return hIdx ;
449458 }
450459
@@ -465,7 +474,7 @@ internal static double DekkersPresplit(double a, double bHi, double bLo, double
465474 /// Computes <c>e*s*s + e*t*t</c> as an expansion (two ScaleExpansion calls each, then sum).
466475 /// Max output: 32 terms for 4-term input (used for InCircle Stage B lift terms).
467476 /// </summary>
468- [ SkipLocalsInit ]
477+ [ SkipLocalsInit ] // keep SkipLocalsInit; remove AggressiveInlining (inlining 3× into AdaptiveInCircle bloats Stage-A JIT frame)
469478 internal static int ScaleExpansionSum ( Span < double > e , int elen , double s , double t , Span < double > h )
470479 {
471480 Span < double > es = stackalloc double [ 8 ] ;
@@ -485,52 +494,121 @@ internal static int ScaleExpansionSum(Span<double> e, int elen, double s, double
485494 /// Merge-then-accumulate two expansions. Matches Lenthe <c>ExpansionBase::ExpansionSum</c>:
486495 /// std::merge by |value| (stable), then sequential grow-expansion accumulation.
487496 /// </summary>
497+ [ MethodImpl ( MethodImplOptions . AggressiveOptimization ) ] // opt-20: aggressive JIT optimization for this hot method
488498 [ SkipLocalsInit ]
489499 internal static int ExpansionSum ( Span < double > e , int elen , Span < double > f , int flen , Span < double > h )
490500 {
491501 if ( elen == 0 && flen == 0 ) { return 0 ; }
492- if ( elen == 0 ) { f [ ..flen ] . CopyTo ( h ) ; return flen ; }
493- if ( flen == 0 ) { e [ ..elen ] . CopyTo ( h ) ; return elen ; }
502+ if ( elen == 0 )
503+ {
504+ // opt-5: Unsafe.CopyBlockUnaligned replaces Span.CopyTo (eliminates Memmove call overhead)
505+ Unsafe . CopyBlockUnaligned (
506+ ref Unsafe . As < double , byte > ( ref MemoryMarshal . GetReference ( h ) ) ,
507+ ref Unsafe . As < double , byte > ( ref MemoryMarshal . GetReference ( f ) ) ,
508+ ( uint ) ( flen * sizeof ( double ) ) ) ;
509+ return flen ;
510+ }
511+ if ( flen == 0 )
512+ {
513+ // opt-5: same as above for flen==0 fast path
514+ Unsafe . CopyBlockUnaligned (
515+ ref Unsafe . As < double , byte > ( ref MemoryMarshal . GetReference ( h ) ) ,
516+ ref Unsafe . As < double , byte > ( ref MemoryMarshal . GetReference ( e ) ) ,
517+ ( uint ) ( elen * sizeof ( double ) ) ) ;
518+ return elen ;
519+ }
494520
495521 int total = elen + flen ;
496522
497- // Merge sorted by |value| into temporary buffer.
498- // Maximum merged size for InCircle Stage D is 192+192=384 ≤ 400, so
499- // the stackalloc path is always taken for that call site.
500- Span < double > merged = total <= 400 ? stackalloc double [ 400 ] : new double [ total ] ;
523+ // opt-1: Tiered stackalloc — allocate only as much as the actual input size requires.
524+ // Using unsafe ref to the first element lets us hold the pointer across the branches
525+ // without assigning the Span itself to an outer variable (which Roslyn disallows for
526+ // stack-allocated Spans that might escape).
527+ if ( total <= 16 )
528+ {
529+ Span < double > merged16 = stackalloc double [ 16 ] ;
530+ return ExpansionSumCore ( e , elen , f , flen , h , merged16 ) ;
531+ }
532+ if ( total <= 64 )
533+ {
534+ Span < double > merged64 = stackalloc double [ 64 ] ;
535+ return ExpansionSumCore ( e , elen , f , flen , h , merged64 ) ;
536+ }
537+ if ( total <= 400 )
538+ {
539+ Span < double > merged400 = stackalloc double [ 400 ] ;
540+ return ExpansionSumCore ( e , elen , f , flen , h , merged400 ) ;
541+ }
542+ return ExpansionSumCore ( e , elen , f , flen , h , new double [ total ] ) ;
543+ }
544+
545+ // opt-2, opt-3, opt-4: Core merge+accumulate logic — receives a pre-sized scratch buffer.
546+ // All span accesses use MemoryMarshal.GetReference + Unsafe.Add to eliminate bounds checks.
547+ [ MethodImpl ( MethodImplOptions . AggressiveInlining | MethodImplOptions . AggressiveOptimization ) ]
548+ private static int ExpansionSumCore (
549+ Span < double > e , int elen , Span < double > f , int flen , Span < double > h , Span < double > merged )
550+ {
551+ ref double eRef = ref MemoryMarshal . GetReference ( e ) ;
552+ ref double fRef = ref MemoryMarshal . GetReference ( f ) ;
553+ ref double mRef = ref MemoryMarshal . GetReference ( merged ) ;
554+
501555 int ei = 0 , fi = 0 , mi = 0 ;
502556 while ( ei < elen && fi < flen )
503557 {
504- if ( Math . Abs ( f [ fi ] ) < Math . Abs ( e [ ei ] ) )
558+ double eVal = Unsafe . Add ( ref eRef , ei ) ;
559+ double fVal = Unsafe . Add ( ref fRef , fi ) ;
560+ if ( Math . Abs ( fVal ) < Math . Abs ( eVal ) )
505561 {
506- merged [ mi ++ ] = f [ fi ++ ] ;
562+ Unsafe . Add ( ref mRef , mi ++ ) = fVal;
563+ fi ++ ;
507564 }
508565 else
509566 {
510- merged [ mi ++ ] = e [ ei ++ ] ;
567+ Unsafe . Add ( ref mRef , mi ++ ) = eVal;
568+ ei ++ ;
511569 }
512570 }
513571
514- while ( ei < elen ) { merged [ mi ++ ] = e [ ei ++ ] ; }
515- while ( fi < flen ) { merged [ mi ++ ] = f [ fi ++ ] ; }
572+ // opt-4: tail copy loops → Unsafe.CopyBlockUnaligned
573+ if ( ei < elen )
574+ {
575+ int rem = elen - ei ;
576+ Unsafe . CopyBlockUnaligned (
577+ ref Unsafe . As < double , byte > ( ref Unsafe . Add ( ref mRef , mi ) ) ,
578+ ref Unsafe . As < double , byte > ( ref Unsafe . Add ( ref eRef , ei ) ) ,
579+ ( uint ) ( rem * sizeof ( double ) ) ) ;
580+ mi += rem ;
581+ }
582+ if ( fi < flen )
583+ {
584+ int rem = flen - fi ;
585+ Unsafe . CopyBlockUnaligned (
586+ ref Unsafe . As < double , byte > ( ref Unsafe . Add ( ref mRef , mi ) ) ,
587+ ref Unsafe . As < double , byte > ( ref Unsafe . Add ( ref fRef , fi ) ) ,
588+ ( uint ) ( rem * sizeof ( double ) ) ) ;
589+ mi += rem ;
590+ }
516591
517- // Sequential accumulation
592+ // opt-3: bounds-check-free accumulation loop using ref locals
593+ ref double hRef = ref MemoryMarshal . GetReference ( h ) ;
518594 int hIdx = 0 ;
519- double Q = merged [ 0 ] ;
520- double Qnew = merged [ 1 ] + Q ;
521- double hh = FastPlusTail ( merged [ 1 ] , Q , Qnew ) ;
595+ double Q = Unsafe . Add ( ref mRef , 0 ) ;
596+ double m1 = Unsafe . Add ( ref mRef , 1 ) ;
597+ double Qnew = m1 + Q ;
598+ double hh = FastPlusTail ( m1 , Q , Qnew ) ;
522599 Q = Qnew ;
523- if ( hh != 0.0 ) { h [ hIdx ++ ] = hh ; }
600+ if ( hh != 0.0 ) { Unsafe . Add ( ref hRef , hIdx ++ ) = hh; }
524601
525602 for ( int g = 2 ; g < mi ; g ++ )
526603 {
527- Qnew = Q + merged [ g ] ;
528- hh = PlusTail ( Q , merged [ g ] , Qnew ) ;
604+ double mg = Unsafe . Add ( ref mRef , g ) ;
605+ Qnew = Q + mg ;
606+ hh = PlusTail ( Q , mg , Qnew ) ;
529607 Q = Qnew ;
530- if ( hh != 0.0 ) { h [ hIdx ++ ] = hh ; }
608+ if ( hh != 0.0 ) { Unsafe . Add ( ref hRef , hIdx ++ ) = hh; }
531609 }
532610
533- if ( Q != 0.0 ) { h [ hIdx ++ ] = Q ; }
611+ if ( Q != 0.0 ) { Unsafe . Add ( ref hRef , hIdx ++ ) = Q; }
534612 return hIdx ;
535613 }
536614
@@ -545,16 +623,19 @@ internal static double Estimate(Span<double> e, int elen)
545623 for ( ; i <= elen - 4 ; i += 4 )
546624 acc = Vector256 . Add ( acc , Vector256 . LoadUnsafe ( ref eRef , ( nuint ) i ) ) ;
547625 double sum = Vector256 . Sum ( acc ) ;
626+ // opt-13: bounds-check-free scalar tail using Unsafe.Add
548627 for ( ; i < elen ; i ++ ) sum += Unsafe . Add ( ref eRef , i ) ;
549628 return sum ;
550629 }
551630
631+ // opt-13: bounds-check-free scalar loop
632+ ref double sRef = ref MemoryMarshal . GetReference ( e ) ;
552633 double s = 0.0 ;
553- for ( int i = 0 ; i < elen ; i ++ ) { s += e [ i ] ; }
634+ for ( int i = 0 ; i < elen ; i ++ ) { s += Unsafe . Add ( ref sRef , i ) ; }
554635 return s ;
555636 }
556637
557- [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
638+ [ MethodImpl ( MethodImplOptions . AggressiveInlining | MethodImplOptions . AggressiveOptimization ) ] // opt-12
558639 internal static double MostSignificant ( Span < double > e , int elen )
559640 {
560641 for ( int i = elen - 1 ; i >= 0 ; i -- )
@@ -564,7 +645,7 @@ internal static double MostSignificant(Span<double> e, int elen)
564645 return 0.0 ;
565646 }
566647
567- [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
648+ [ MethodImpl ( MethodImplOptions . AggressiveInlining | MethodImplOptions . AggressiveOptimization ) ] // opt-14
568649 internal static void NegateInto ( Span < double > src , int len , Span < double > dst )
569650 {
570651 if ( Vector256 . IsHardwareAccelerated && len >= 4 )
0 commit comments