77 *
88 * mode 0 – shortest round-trip string (repr / str)
99 * mode 2 – N significant digits (%e, %g)
10- * mode 3 – N digits past decimal point (%f), ndigits >= 0 only
11- *
12- * The negative-ndigits case of mode 3 (used by float.__round__ with a
13- * negative argument) is NOT handled here; callers must use _Py_dg_dtoa for
14- * that path .
10+ * mode 3 – N digits past decimal point (%f); supports ndigits >= 0 via
11+ * d2fixed_buffered_n, and ndigits < 0 (float.__round__ with a
12+ * negative argument) via a small adapter that rounds to the
13+ * nearest multiple of 10^(-ndigits) using the exact digit
14+ * expansion of floor(|d|) plus a "fractional part > 0" bit .
1515 *
1616 * Output contract (matches Gay's dtoa):
1717 * - Returns a PyMem_Malloc'd buffer containing raw decimal digits only
4040#include <stdint.h>
4141#include <string.h>
4242#include <assert.h>
43+ #include <math.h> /* fabs, floor, signbit, isnan, isinf */
4344#include "pymem.h" /* PyMem_Malloc / PyMem_Free */
4445#include "_ryu/ryu.h" /* d2s_buffered_n, d2exp_buffered_n,
4546 d2fixed_buffered_n */
4647
47- /* Maximum buffer sizes for Ryu's output:
48- * d2s : up to 25 chars (sign + 17 digits + 'E' + sign + 3-digit exp)
49- * d2exp : up to 2000 chars (for very high precision; Ryu uses 2000 in d2exp)
50- * d2fixed: up to 2000 chars
51- * We use a generous stack buffer for d2s and heap for the others.
48+ /* Maximum buffer sizes for Ryu's output.
49+ *
50+ * d2s always fits in ~25 chars (sign + 17 digits + 'E' + sign + 3-digit exp).
51+ *
52+ * d2exp and d2fixed write a size proportional to the requested precision.
53+ * Python's format code accepts arbitrarily large precisions (e.g. "%.123456f"),
54+ * so we must size the heap buffer dynamically — a fixed 2000-byte buffer
55+ * would be smashed. Worst-case output sizes (from reading the Ryu source):
56+ *
57+ * d2fixed: [sign] + up to 309 integer digits + '.' + precision + NUL
58+ * ≤ 312 + precision
59+ * d2exp : [sign] + 1 digit + '.' + precision + 'e' + sign + 4 exp + NUL
60+ * ≤ 10 + precision
61+ *
62+ * The helpers below round up generously (+64 slack) and clamp to a modest
63+ * minimum so negative/zero precision still allocates a sane-sized buffer.
64+ * _PYRYU_D2FIXED_BUFSIZE is retained as a small stack buffer for the
65+ * integer-digit extraction in ryu_mode3_neg (precision=0 path).
5266 */
5367#define _PYRYU_D2S_BUFSIZE 32
54- #define _PYRYU_D2EXP_BUFSIZE 2000
55- #define _PYRYU_D2FIXED_BUFSIZE 2000
68+ #define _PYRYU_D2FIXED_BUFSIZE 512
69+ static inline size_t _pyryu_d2fixed_bufsize (int precision ) {
70+ size_t p = (precision > 0 ) ? (size_t )precision : 0 ;
71+ size_t n = p + 384 ;
72+ return n < 512 ? 512 : n ;
73+ }
74+ static inline size_t _pyryu_d2exp_bufsize (int precision ) {
75+ size_t p = (precision > 0 ) ? (size_t )precision : 0 ;
76+ size_t n = p + 96 ;
77+ return n < 128 ? 128 : n ;
78+ }
5679
5780/* -------------------------------------------------------------------------
5881 * parse_ryu_d2s_output
@@ -340,6 +363,188 @@ parse_ryu_d2fixed_output(const char *ryu_buf, int ryu_len,
340363 return 1 ;
341364}
342365
366+ /* -------------------------------------------------------------------------
367+ * ryu_mode3_neg
368+ *
369+ * Mode 3 with negative ndigits = -k (k >= 1): round |d| to the nearest
370+ * multiple of 10^k with banker's tie-to-even against the *exact* value
371+ * of d. Gay's _Py_dg_dtoa(d, 3, -k, ...) does the same thing.
372+ *
373+ * Algorithm:
374+ * 1. Extract sign and handle NaN/Inf/0.
375+ * 2. Let ix = floor(|d|). Since doubles with |d| >= 2^52 are already
376+ * integers, floor() is exact for every finite double.
377+ * 3. Call d2fixed_buffered_n(ix, 0) to obtain the exact decimal digits
378+ * of ix. (No banker-rounding happens because ix is an integer.)
379+ * 4. frac_nonzero = (|d| != ix). This is the only information from the
380+ * sub-integer part that matters for rounding at an integer-scale
381+ * position: for k >= 1, the tie between Q*10^k and (Q+1)*10^k occurs
382+ * exactly at R == 10^k/2 with f == 0.
383+ * 5. Split the integer digit string into Q (high |ix_len|-k digits) and
384+ * R (low k digits). Compare R against 10^k/2 (= "5" + (k-1) "0"s).
385+ * 6. Round:
386+ * R < 10^k/2 : keep Q
387+ * R > 10^k/2 : Q += 1
388+ * R == 10^k/2, f > 0 : Q += 1
389+ * R == 10^k/2, f == 0 : banker's (Q += 1 iff Q's last digit is odd)
390+ * 7. Output digits = decimal of Q with trailing zeros stripped,
391+ * decpt = k + len(Q_before_stripping) (so value = digits * 10^exp
392+ * with exp = decpt - len(digits) == k + stripped_zero_count,
393+ * preserving the Q * 10^k value). If Q == 0, emit "0" with decpt=1.
394+ *
395+ * Returns 1 on success, 0 on memory failure.
396+ * ------------------------------------------------------------------------- */
397+ static int
398+ ryu_mode3_neg (double d , int k ,
399+ char * * out_digits , int * decpt , int * sign , char * * digits_end )
400+ {
401+ assert (k >= 1 );
402+
403+ * sign = signbit (d ) ? 1 : 0 ;
404+
405+ /* NaN / Infinity. Emit the literal string (no sign — caller tracks it
406+ * via *sign) and decpt=9999, matching Gay's dtoa convention. */
407+ if (isnan (d ) || isinf (d )) {
408+ const char * lit = isnan (d ) ? "NaN" : "Infinity" ;
409+ size_t n = strlen (lit );
410+ char * buf = (char * )PyMem_Malloc (n + 1 );
411+ if (buf == NULL ) return 0 ;
412+ memcpy (buf , lit , n + 1 );
413+ * out_digits = buf ;
414+ * digits_end = buf + n ;
415+ * decpt = 9999 ;
416+ return 1 ;
417+ }
418+
419+ /* Zero (signed or unsigned). */
420+ if (d == 0.0 ) {
421+ char * buf = (char * )PyMem_Malloc (2 );
422+ if (buf == NULL ) return 0 ;
423+ buf [0 ] = '0' ; buf [1 ] = '\0' ;
424+ * out_digits = buf ;
425+ * digits_end = buf + 1 ;
426+ * decpt = 1 ;
427+ return 1 ;
428+ }
429+
430+ double ax = fabs (d );
431+ double ix = floor (ax );
432+ int frac_nonzero = (ax != ix );
433+
434+ /* Exact integer digits of ix. d2fixed with precision=0 on an integer
435+ * input performs no rounding: Ryu's first loop emits the exact digits
436+ * from POW10_SPLIT tables, and the fractional-rounding loop finds no
437+ * nonzero fractional digits. */
438+ char intbuf [_PYRYU_D2FIXED_BUFSIZE ];
439+ int intlen = d2fixed_buffered_n (ix , 0 , intbuf );
440+ /* ix >= 0, so no leading '-' in intbuf. */
441+
442+ /* Case: integer part has fewer digits than k. Value < 10^(k-1) since
443+ * intlen <= k-1 and d2fixed emits at least one digit. That is strictly
444+ * less than 10^k/2, so we round down to 0 regardless of fractional. */
445+ if (intlen < k ) {
446+ char * buf = (char * )PyMem_Malloc (2 );
447+ if (buf == NULL ) return 0 ;
448+ buf [0 ] = '0' ; buf [1 ] = '\0' ;
449+ * out_digits = buf ;
450+ * digits_end = buf + 1 ;
451+ * decpt = 1 ;
452+ return 1 ;
453+ }
454+
455+ int q_len = intlen - k ; /* digits of Q_before_rounding; may be 0 */
456+
457+ /* Compare R (the low k digits) against 10^k/2 ("5" + (k-1) zeros). */
458+ int cmp ;
459+ {
460+ char r_first = intbuf [q_len ];
461+ if (r_first < '5' ) {
462+ cmp = -1 ;
463+ }
464+ else if (r_first > '5' ) {
465+ cmp = 1 ;
466+ }
467+ else {
468+ cmp = 0 ;
469+ for (int i = q_len + 1 ; i < intlen ; i ++ ) {
470+ if (intbuf [i ] != '0' ) {
471+ cmp = 1 ;
472+ break ;
473+ }
474+ }
475+ }
476+ }
477+
478+ int round_up ;
479+ if (cmp < 0 ) {
480+ round_up = 0 ;
481+ }
482+ else if (cmp > 0 ) {
483+ round_up = 1 ;
484+ }
485+ else {
486+ /* R == 10^k/2 exactly */
487+ if (frac_nonzero ) {
488+ round_up = 1 ;
489+ }
490+ else {
491+ char q_last = (q_len > 0 ) ? intbuf [q_len - 1 ] : '0' ;
492+ round_up = ((q_last - '0' ) & 1 ) ? 1 : 0 ;
493+ }
494+ }
495+
496+ /* Build Q as a digit string, with room for a possible carry-out. */
497+ char * qbuf = (char * )PyMem_Malloc ((size_t )q_len + 2 );
498+ if (qbuf == NULL ) return 0 ;
499+ if (q_len == 0 ) {
500+ qbuf [0 ] = round_up ? '1' : '0' ;
501+ qbuf [1 ] = '\0' ;
502+ }
503+ else {
504+ memcpy (qbuf , intbuf , (size_t )q_len );
505+ qbuf [q_len ] = '\0' ;
506+ if (round_up ) {
507+ int i = q_len - 1 ;
508+ while (i >= 0 && qbuf [i ] == '9' ) {
509+ qbuf [i ] = '0' ;
510+ i -- ;
511+ }
512+ if (i >= 0 ) {
513+ qbuf [i ]++ ;
514+ }
515+ else {
516+ /* Carry propagated past leading digit — prepend '1'. */
517+ memmove (qbuf + 1 , qbuf , (size_t )q_len );
518+ qbuf [0 ] = '1' ;
519+ qbuf [q_len + 1 ] = '\0' ;
520+ }
521+ }
522+ }
523+ int qlen = (int )strlen (qbuf );
524+
525+ /* Special case: rounded value is 0. */
526+ if (qlen == 1 && qbuf [0 ] == '0' ) {
527+ * out_digits = qbuf ;
528+ * digits_end = qbuf + 1 ;
529+ * decpt = 1 ;
530+ return 1 ;
531+ }
532+
533+ /* value = Q * 10^k with Q's decimal digits = qbuf; decpt = k + qlen.
534+ * Strip trailing zeros from qbuf (decpt is unchanged since the
535+ * represented value is invariant under digits -> digits+"0" with
536+ * exp += 0 per our decpt formula). */
537+ * decpt = k + qlen ;
538+ while (qlen > 1 && qbuf [qlen - 1 ] == '0' ) {
539+ qlen -- ;
540+ }
541+ qbuf [qlen ] = '\0' ;
542+
543+ * out_digits = qbuf ;
544+ * digits_end = qbuf + qlen ;
545+ return 1 ;
546+ }
547+
343548/* -------------------------------------------------------------------------
344549 * _PyRyu_dtoa – main entry point
345550 * ------------------------------------------------------------------------- */
@@ -366,7 +571,7 @@ _PyRyu_dtoa(double d, int mode, int ndigits,
366571 * for a total of P+1 significant digits.
367572 * So we pass precision = ndigits - 1. */
368573 int precision = (ndigits > 0 ) ? ndigits - 1 : 0 ;
369- char * buf = (char * )PyMem_Malloc (_PYRYU_D2EXP_BUFSIZE );
574+ char * buf = (char * )PyMem_Malloc (_pyryu_d2exp_bufsize ( precision ) );
370575 if (buf == NULL )
371576 return NULL ;
372577 int len = d2exp_buffered_n (d , (uint32_t )precision , buf );
@@ -379,9 +584,15 @@ _PyRyu_dtoa(double d, int mode, int ndigits,
379584 }
380585 case 3 : {
381586 /* ndigits digits after the decimal point (fixed-point format).
382- * ndigits must be >= 0 here (negative case uses _Py_dg_dtoa). */
383- assert (ndigits >= 0 );
384- char * buf = (char * )PyMem_Malloc (_PYRYU_D2FIXED_BUFSIZE );
587+ * ndigits < 0 means round to the nearest multiple of 10^(-ndigits),
588+ * used by float.__round__ with a negative argument. */
589+ if (ndigits < 0 ) {
590+ if (!ryu_mode3_neg (d , - ndigits , & out_digits , decpt , sign ,
591+ digits_end ))
592+ return NULL ;
593+ break ;
594+ }
595+ char * buf = (char * )PyMem_Malloc (_pyryu_d2fixed_bufsize (ndigits ));
385596 if (buf == NULL )
386597 return NULL ;
387598 int len = d2fixed_buffered_n (d , (uint32_t )ndigits , buf );
0 commit comments