@@ -453,36 +453,135 @@ pure
453453Cent udivmod (Cent c1, Cent c2, out Cent modulus)
454454{
455455 // printf("udiv c1(%llx,%llx) c2(%llx,%llx)\n", c1.lo, c1.hi, c2.lo, c2.hi);
456+ // Based on "Unsigned Doubleword Division" in Hacker's Delight
457+ import core.bitop ;
458+
459+ // Divides a 128-bit dividend by a 64-bit divisor.
460+ // The result must fit in 64 bits.
461+ static U udivmod128_64 (Cent c1, U c2, out U modulus)
462+ {
463+ // We work in base 2^^32
464+ enum base = 1UL << 32 ;
465+ enum divmask = (1UL << (Ubits / 2 )) - 1 ;
466+ enum divshift = Ubits / 2 ;
467+
468+ // Check for overflow and divide by 0
469+ if (c1.hi >= c2)
470+ {
471+ modulus = 0UL ;
472+ return ~ 0UL ;
473+ }
474+
475+ // Computes [num1 num0] / den
476+ static uint udiv96_64 (U num1, uint num0, U den)
477+ {
478+ // Extract both digits of the denominator
479+ const den1 = cast (uint )(den >> divshift);
480+ const den0 = cast (uint )(den & divmask);
481+ // Estimate ret as num1 / den1, and then correct it
482+ U ret = num1 / den1;
483+ const t2 = (num1 % den1) * base + num0;
484+ const t1 = ret * den0;
485+ if (t1 > t2)
486+ ret -= (t1 - t2 > den) ? 2 : 1 ;
487+ return cast (uint )ret;
488+ }
489+
490+ // Determine the normalization factor. We multiply c2 by this, so that its leading
491+ // digit is at least half base. In binary this means just shifting left by the number
492+ // of leading zeros, so that there's a 1 in the MSB.
493+ // We also shift number by the same amount. This cannot overflow because c1.hi < c2.
494+ const shift = (Ubits - 1 ) - bsr(c2);
495+ c2 <<= shift;
496+ U num2 = c1.hi;
497+ num2 <<= shift;
498+ num2 |= (c1.lo >> (- shift & 63 )) & (- cast (I)shift >> 63 );
499+ c1.lo <<= shift;
500+
501+ // Extract the low digits of the numerator (after normalizing)
502+ const num1 = cast (uint )(c1.lo >> divshift);
503+ const num0 = cast (uint )(c1.lo & divmask);
504+
505+ // Compute q1 = [num2 num1] / c2
506+ const q1 = udiv96_64(num2, num1, c2);
507+ // Compute the true (partial) remainder
508+ const rem = num2 * base + num1 - q1 * c2;
509+ // Compute q0 = [rem num0] / c2
510+ const q0 = udiv96_64(rem, num0, c2);
511+
512+ modulus = (rem * base + num0 - q0 * c2) >> shift;
513+ return (cast (U)q1 << divshift) | q0;
514+ }
515+
516+ // Special cases
456517 if (! tst(c2))
457518 {
458519 // Divide by zero
459520 modulus = Zero;
460521 return com (modulus);
461522 }
462-
463- // left justify c2
464- uint shifts = 1 ;
465- while (cast (I)c2.hi >= 0 )
523+ if (c1.hi == 0 && c2.hi == 0 )
524+ {
525+ // Single precision divide
526+ modulus = Cent(c1.lo % c2.lo);
527+ return Cent (c1.lo / c2.lo);
528+ }
529+ if (c1.hi == 0 )
530+ {
531+ // Numerator is smaller than the divisor
532+ modulus = c1;
533+ return Zero;
534+ }
535+ if (c2.hi == 0 )
466536 {
467- c2 = shl1(c2);
468- ++ shifts;
537+ // Divisor is a 64-bit value, so we just need one 128/64 division.
538+ // If c1 / c2 would overflow, break c1 up into two halves.
539+ const q1 = (c1.hi < c2.lo) ? 0 : (c1.hi / c2.lo);
540+ if (q1)
541+ c1.hi = c1.hi % c2.lo;
542+ U rem;
543+ const q0 = udivmod128_64(c1, c2.lo, rem);
544+ modulus = Cent(rem);
545+ return Cent (q0, q1);
469546 }
470547
471- // subtract and shift, just like 3rd grade long division
472- Cent quotient;
473- while (shifts-- )
548+ // Full cent precision division.
549+ // Here c2 >= 2^^64
550+ // We know that c2.hi != 0, so count leading zeros is OK
551+ // We have 0 <= shift <= 63
552+ const shift = (Ubits - 1 ) - bsr(c2.hi);
553+
554+ // Normalize the divisor so its MSB is 1
555+ // v1 = (c2 << shift) >> 64
556+ U v1 = shl(c2, shift).hi;
557+
558+ // To ensure no overflow.
559+ Cent u1 = shr1(c1);
560+
561+ // Get quotient from divide unsigned operation.
562+ U rem_ignored;
563+ const q1 = udivmod128_64(u1, v1, rem_ignored);
564+
565+ // Undo normalization and division of c1 by 2.
566+ Cent quotient = shr(shl(Cent(q1), shift), 63 );
567+
568+ // Make quotient correct or too small by 1
569+ if (tst(quotient))
570+ quotient = dec(quotient);
571+
572+ // Now quotient is correct.
573+ // Compute rem = c1 - (quotient * c2);
574+ Cent rem = sub(c1, mul(quotient, c2));
575+
576+ // Check if remainder is larger than the divisor
577+ if (uge(rem, c2))
474578 {
475- // printf("shifts %d c1(%llx,%llx) c2(%llx,%llx)\n", shifts, c1.lo, c1.hi, c2.lo, c2.hi);
476- quotient = shl1(quotient);
477- if (uge(c1, c2))
478- {
479- // printf("sub\n");
480- c1 = sub(c1, c2);
481- quotient.lo |= 1 ;
482- }
483- c2 = shr1(c2);
579+ // Increment quotient
580+ quotient = inc(quotient);
581+ // Subtract c2 from remainder
582+ rem = sub(rem, c2);
484583 }
485- modulus = c1 ;
584+ modulus = rem ;
486585 // printf("quotient "); print(quotient);
487586 // printf("modulus "); print(modulus);
488587 return quotient;
@@ -705,6 +804,9 @@ unittest
705804
706805 enum Cs_3 = Cent(3 , I.min);
707806
807+ const Cbig_1 = Cent(0xa3ccac1832952398 , 0xc3ac542864f652f8 );
808+ const Cbig_2 = Cent(0x5267b85f8a42fc20 , 0 );
809+
708810 /* ***********************/
709811
710812 assert ( ugt(C1 , C0 ) );
@@ -776,19 +878,29 @@ unittest
776878 assert (udivmod(C10 ,C2 , modulus) == C5 ); assert (modulus == C0 );
777879 assert (udivmod(C10 ,C3 , modulus) == C3 ); assert (modulus == C1 );
778880 assert (udivmod(C10 ,C0 , modulus) == Cm1); assert (modulus == C0 );
881+ assert (udivmod(C2 ,C90_30 , modulus) == C0 ); assert (modulus == C2 );
882+ assert (udiv(mul(C90_30 , C2 ), C2 ) == C90_30 );
883+ assert (udiv(mul(C90_30 , C2 ), C90_30 ) == C2 );
779884
780885 assert (div(C10 ,C3 ) == C3 );
781886 assert (divmod( C10 , C3 , modulus) == C3 ); assert (modulus == C1 );
782887 assert (divmod(Cm10, C3 , modulus) == Cm3); assert (modulus == Cm1);
783888 assert (divmod( C10 , Cm3, modulus) == Cm3); assert (modulus == C1 );
784889 assert (divmod(Cm10, Cm3, modulus) == C3 ); assert (modulus == Cm1);
890+ assert (divmod(C2 , C90_30 , modulus) == C0 ); assert (modulus == C2 );
891+ assert (div(mul(C90_30 , C2 ), C2 ) == C90_30 );
892+ assert (div(mul(C90_30 , C2 ), C90_30 ) == C2 );
893+
894+ assert (divmod(Cbig_1, Cbig_2, modulus) == Cent(0x4496aa309d4d4a2f , U.max));
895+ assert (modulus == Cent(0xd83203d0fdc799b8 , U.max));
896+ assert (udivmod(Cbig_1, Cbig_2, modulus) == Cent(0x5fe0e9bace2bedad , 2 ));
897+ assert (modulus == Cent(0x2c923125a68721f8 , 0 ));
785898
786899 assert (mul(Cm10, C1 ) == Cm10);
787900 assert (mul(C1 , Cm10) == Cm10);
788901 assert (mul(C9_3 , C10 ) == C90_30 );
789902 assert (mul(Cs_3, C10 ) == C30 );
790903 assert (mul(Cm10, Cm10) == C100 );
791- assert (div(mul(C90_30 , C2 ), C2 ) == C90_30 );
792904
793905 assert ( or(C4_8 , C3_1 ) == C7_9 );
794906 assert (and(C4_8 , C7_9 ) == C4_8 );
0 commit comments