@@ -382,7 +382,67 @@ public static long[] divide128by64Unsigned(long u1, long u0, long v) {
382382
383383 return new long [] { qHigh , qLow , finalRemainder };
384384 }
385-
385+
386+ /**
387+ * Truncated unsigned 128 / 64 bit division which returns only the low part of the quotient,
388+ * derived from <code>divide128by64Unsigned(long u1, long u0, long v)</code>.
389+ *
390+ * In the cases where it is useful, this implementation is a little bit faster because it does not allocate the result array.
391+ *
392+ * @param u1 high 64 bits of the dividend
393+ * @param u0 low 64 bits of the dividend
394+ * @param v divisor
395+ * @return the lower 64 bit of the quotient
396+ */
397+ public static long truncatedDivide128by64Unsigned (long u1 , long u0 , long v ) {
398+ if (v == 0 ) throw new ArithmeticException ("Division by zero" );
399+
400+ // Step 1: check for quotients > 64 bit
401+ if (Long .compareUnsigned (u1 , v ) >= 0 ) {
402+ u1 = Long .remainderUnsigned (u1 , v );
403+ }
404+
405+ // Step 2: highly optimized 64/64 division for the remaining part
406+ // (the dividend now is u1:u0, which guarantees u1 < v)
407+
408+ // 2.1. Normalization
409+ int s = Long .numberOfLeadingZeros (v );
410+ long v_norm = v << s ;
411+ long v_hi = v_norm >>> 32 ;
412+ long v_lo = v_norm & 0xFFFFFFFFL ;
413+
414+ // shift u1:u0 (careful with s=0)
415+ long u_hi = (s == 0 ) ? u1 : (u1 << s ) | (u0 >>> (64 - s ));
416+ long u_lo = u0 << s ;
417+
418+ // 2.2. first 32-bit half of the quotient (q1)
419+ long q1 = Long .divideUnsigned (u_hi , v_hi );
420+ long rhat = Long .remainderUnsigned (u_hi , v_hi );
421+
422+ while (Long .compareUnsigned (q1 , 0x100000000L ) >= 0 || Long .compareUnsigned (q1 * v_lo , (rhat << 32 ) | (u_lo >>> 32 )) > 0 ) {
423+ q1 --;
424+ rhat += v_hi ;
425+ if (Long .compareUnsigned (rhat , 0x100000000L ) >= 0 ) break ;
426+ }
427+
428+ // intermediate remainder
429+ long rem = ((u_hi << 32 ) | (u_lo >>> 32 )) - (q1 * v_norm );
430+
431+ // 2.3. Second 32-bit half of the quotient (q0)
432+ long q0 = Long .divideUnsigned (rem , v_hi );
433+ rhat = Long .remainderUnsigned (rem , v_hi );
434+
435+ while (Long .compareUnsigned (q0 , 0x100000000L ) >= 0 || Long .compareUnsigned (q0 * v_lo , (rhat << 32 ) | (u_lo & 0xFFFFFFFFL )) > 0 ) {
436+ q0 --;
437+ rhat += v_hi ;
438+ if (Long .compareUnsigned (rhat , 0x100000000L ) >= 0 ) break ;
439+ }
440+
441+ // 2.4. Assemble results
442+ long qLow = (q1 << 32 ) | q0 ;
443+ return qLow ;
444+ }
445+
386446 /**
387447 * Computes the remainder of an unsigned 128 % 64 bit division.
388448 * This implementation has been worked out with support from Google Gemini.
0 commit comments