Skip to content

Commit 63adf3d

Browse files
eendebakptclaude
andcommitted
gh-NNNN: Extend Ryu adapter to mode 3 with negative ndigits, drop _Py_dg_dtoa
Ryu can now handle every case _Py_dg_dtoa used to cover in pystrtod.c and floatobject.c. The remaining gap was mode 3 with negative precision, used by float.__round__(x, -k) to round x to the nearest multiple of 10**k. Add a new routine ryu_mode3_neg() in Python/_ryu/pystrtod_ryu.h that: 1. Obtains the exact integer digits of floor(|d|) via d2fixed_buffered_n(floor(|d|), 0) — d2fixed's first loop emits the exact POW10_SPLIT-sourced digits of an integer input without rounding. 2. Records frac_nonzero = (|d| != floor(|d|)) — the only sub-integer information needed, since for k >= 1 a tie between Q*10^k and (Q+1)*10^k occurs exactly at R == 10^k/2 with fractional part 0. 3. Splits the integer digit string into Q (high len-k digits) and R (low k digits), compares R to 10^k/2 as a digit pattern, and applies banker's rounding with the tie broken by frac_nonzero, then by Q's last digit. 4. Emits digits and decpt in Gay's convention so the existing format_float_short / double_round callers keep working unchanged. Cross-checked against decimal.quantize(ROUND_HALF_EVEN) on 23 000 random (x, k) pairs with no mismatches; all 80 cases in test_float (including the branch's own test_round_neg_ndigits_large regression test and the 25 dtoa-mode tests) pass. Since Ryu now serves every mode and precision format_float_short and double_round use, drop the _Py_dg_dtoa fallback from both call sites and delete _Py_dg_dtoa, _Py_dg_freedtoa, and their private rv_alloc / nrv_alloc helpers (~690 lines) from Python/dtoa.c and the declarations from Include/internal/pycore_dtoa.h. _Py_dg_strtod is still used for string->double parsing and stays. Also fix a latent heap-smash when a caller asks for very large precision (e.g. "%12.123456f" in test_format): the fixed 2000-byte output buffer for d2fixed / d2exp is replaced with a dynamically sized one scaled to the requested precision. Wire the include path for Objects/floatobject.o and Python/pystrtod.o (both need Python/ on -I so that pystrtod_ryu.h can resolve "_ryu/ryu.h"), matching the existing rules for the Ryu sources. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent b1bd06a commit 63adf3d

6 files changed

Lines changed: 252 additions & 752 deletions

File tree

Include/internal/pycore_dtoa.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,6 @@ extern "C" {
2525
#endif
2626

2727
extern double _Py_dg_strtod(const char *str, char **ptr);
28-
extern char* _Py_dg_dtoa(double d, int mode, int ndigits,
29-
int *decpt, int *sign, char **rve);
30-
extern void _Py_dg_freedtoa(char *s);
3128

3229

3330
extern PyStatus _PyDtoa_Init(PyInterpreterState *interp);

Makefile.pre.in

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3213,6 +3213,15 @@ Python/_ryu/d2s.o: Python/_ryu/d2s.c
32133213
Python/_ryu/d2fixed.o: Python/_ryu/d2fixed.c
32143214
$(CC) -c $(PY_CORE_CFLAGS) -I$(srcdir)/Python -o $@ $<
32153215

3216+
# pystrtod.c and floatobject.c include Python/_ryu/pystrtod_ryu.h which in
3217+
# turn includes "_ryu/ryu.h"; the latter resolves relative to Python/, so
3218+
# both translation units need Python/ on the include path.
3219+
Python/pystrtod.o: Python/pystrtod.c
3220+
$(CC) -c $(PY_CORE_CFLAGS) -I$(srcdir)/Python -o $@ $<
3221+
3222+
Objects/floatobject.o: Objects/floatobject.c
3223+
$(CC) -c $(PY_CORE_CFLAGS) -I$(srcdir)/Python -o $@ $<
3224+
32163225
Python/ceval.o: Python/ceval.c
32173226
$(CC) -c $(PY_CORE_CFLAGS) $(CFLAGS_CEVAL) -o $@ $<
32183227

Objects/floatobject.c

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
#include "Python.h"
77
#include "pycore_abstract.h" // _PyNumber_Index()
8-
#include "pycore_dtoa.h" // _Py_dg_dtoa()
8+
#include "pycore_dtoa.h" // _Py_dg_strtod()
99
#include "pycore_floatobject.h" // _PyFloat_FormatAdvancedWriter()
1010
#include "pycore_freelist.h" // _Py_FREELIST_FREE(), _Py_FREELIST_POP()
1111
#include "pycore_initconfig.h" // _PyStatus_OK()
@@ -18,7 +18,7 @@
1818
#include "pycore_structseq.h" // _PyStructSequence_FiniBuiltin()
1919
#include "pycore_tuple.h" // _PyTuple_FromPair
2020
#if _PY_SHORT_FLOAT_REPR == 1
21-
#include "../Python/_ryu/pystrtod_ryu.h" // _PyRyu_dtoa
21+
#include "_ryu/pystrtod_ryu.h" // _PyRyu_dtoa
2222
#endif
2323

2424
#include <float.h> // DBL_MAX
@@ -912,21 +912,14 @@ double_round(double x, int ndigits) {
912912
Py_ssize_t buflen, mybuflen=100;
913913
char *buf, *buf_end, shortbuf[100], *mybuf=shortbuf;
914914
int decpt, sign;
915-
int buf_uses_pymem = 0;
916915
PyObject *result = NULL;
917916
_Py_SET_53BIT_PRECISION_HEADER;
918917

919-
/* round to a decimal string.
920-
Use Ryu for ndigits >= 0 (mode 3 fixed-point), Gay's dtoa for
921-
ndigits < 0 (mode 3 with negative precision, no Ryu equivalent). */
918+
/* Round to a decimal string. Ryu handles both ndigits >= 0 (mode 3
919+
fixed-point via d2fixed_buffered_n) and ndigits < 0 (nearest
920+
multiple of 10^(-ndigits) via the mode-3 negative adapter). */
922921
_Py_SET_53BIT_PRECISION_START;
923-
if (ndigits >= 0) {
924-
buf = _PyRyu_dtoa(x, 3, ndigits, &decpt, &sign, &buf_end);
925-
buf_uses_pymem = 1;
926-
}
927-
else {
928-
buf = _Py_dg_dtoa(x, 3, ndigits, &decpt, &sign, &buf_end);
929-
}
922+
buf = _PyRyu_dtoa(x, 3, ndigits, &decpt, &sign, &buf_end);
930923
_Py_SET_53BIT_PRECISION_END;
931924
if (buf == NULL) {
932925
PyErr_NoMemory();
@@ -963,10 +956,7 @@ double_round(double x, int ndigits) {
963956
if (mybuf != shortbuf)
964957
PyMem_Free(mybuf);
965958
exit:
966-
if (buf_uses_pymem)
967-
PyMem_Free(buf);
968-
else
969-
_Py_dg_freedtoa(buf);
959+
PyMem_Free(buf);
970960
return result;
971961
}
972962

Python/_ryu/pystrtod_ryu.h

Lines changed: 227 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77
*
88
* mode 0 – shortest round-trip string (repr / str)
99
* mode 2 – N significant digits (%e, %g)
10-
* mode 3 – N digits past decimal point (%f), ndigits >= 0 only
11-
*
12-
* The negative-ndigits case of mode 3 (used by float.__round__ with a
13-
* negative argument) is NOT handled here; callers must use _Py_dg_dtoa for
14-
* that path.
10+
* mode 3 – N digits past decimal point (%f); supports ndigits >= 0 via
11+
* d2fixed_buffered_n, and ndigits < 0 (float.__round__ with a
12+
* negative argument) via a small adapter that rounds to the
13+
* nearest multiple of 10^(-ndigits) using the exact digit
14+
* expansion of floor(|d|) plus a "fractional part > 0" bit.
1515
*
1616
* Output contract (matches Gay's dtoa):
1717
* - Returns a PyMem_Malloc'd buffer containing raw decimal digits only
@@ -40,19 +40,42 @@
4040
#include <stdint.h>
4141
#include <string.h>
4242
#include <assert.h>
43+
#include <math.h> /* fabs, floor, signbit, isnan, isinf */
4344
#include "pymem.h" /* PyMem_Malloc / PyMem_Free */
4445
#include "_ryu/ryu.h" /* d2s_buffered_n, d2exp_buffered_n,
4546
d2fixed_buffered_n */
4647

47-
/* Maximum buffer sizes for Ryu's output:
48-
* d2s : up to 25 chars (sign + 17 digits + 'E' + sign + 3-digit exp)
49-
* d2exp : up to 2000 chars (for very high precision; Ryu uses 2000 in d2exp)
50-
* d2fixed: up to 2000 chars
51-
* We use a generous stack buffer for d2s and heap for the others.
48+
/* Maximum buffer sizes for Ryu's output.
49+
*
50+
* d2s always fits in ~25 chars (sign + 17 digits + 'E' + sign + 3-digit exp).
51+
*
52+
* d2exp and d2fixed write a size proportional to the requested precision.
53+
* Python's format code accepts arbitrarily large precisions (e.g. "%.123456f"),
54+
* so we must size the heap buffer dynamically — a fixed 2000-byte buffer
55+
* would be smashed. Worst-case output sizes (from reading the Ryu source):
56+
*
57+
* d2fixed: [sign] + up to 309 integer digits + '.' + precision + NUL
58+
* ≤ 312 + precision
59+
* d2exp : [sign] + 1 digit + '.' + precision + 'e' + sign + 4 exp + NUL
60+
* ≤ 10 + precision
61+
*
62+
* The helpers below round up generously (+64 slack) and clamp to a modest
63+
* minimum so negative/zero precision still allocates a sane-sized buffer.
64+
* _PYRYU_D2FIXED_BUFSIZE is retained as a small stack buffer for the
65+
* integer-digit extraction in ryu_mode3_neg (precision=0 path).
5266
*/
5367
#define _PYRYU_D2S_BUFSIZE 32
54-
#define _PYRYU_D2EXP_BUFSIZE 2000
55-
#define _PYRYU_D2FIXED_BUFSIZE 2000
68+
#define _PYRYU_D2FIXED_BUFSIZE 512
69+
static inline size_t _pyryu_d2fixed_bufsize(int precision) {
70+
size_t p = (precision > 0) ? (size_t)precision : 0;
71+
size_t n = p + 384;
72+
return n < 512 ? 512 : n;
73+
}
74+
static inline size_t _pyryu_d2exp_bufsize(int precision) {
75+
size_t p = (precision > 0) ? (size_t)precision : 0;
76+
size_t n = p + 96;
77+
return n < 128 ? 128 : n;
78+
}
5679

5780
/* -------------------------------------------------------------------------
5881
* parse_ryu_d2s_output
@@ -340,6 +363,188 @@ parse_ryu_d2fixed_output(const char *ryu_buf, int ryu_len,
340363
return 1;
341364
}
342365

366+
/* -------------------------------------------------------------------------
367+
* ryu_mode3_neg
368+
*
369+
* Mode 3 with negative ndigits = -k (k >= 1): round |d| to the nearest
370+
* multiple of 10^k with banker's tie-to-even against the *exact* value
371+
* of d. Gay's _Py_dg_dtoa(d, 3, -k, ...) does the same thing.
372+
*
373+
* Algorithm:
374+
* 1. Extract sign and handle NaN/Inf/0.
375+
* 2. Let ix = floor(|d|). Since doubles with |d| >= 2^52 are already
376+
* integers, floor() is exact for every finite double.
377+
* 3. Call d2fixed_buffered_n(ix, 0) to obtain the exact decimal digits
378+
* of ix. (No banker-rounding happens because ix is an integer.)
379+
* 4. frac_nonzero = (|d| != ix). This is the only information from the
380+
* sub-integer part that matters for rounding at an integer-scale
381+
* position: for k >= 1, the tie between Q*10^k and (Q+1)*10^k occurs
382+
* exactly at R == 10^k/2 with f == 0.
383+
* 5. Split the integer digit string into Q (high |ix_len|-k digits) and
384+
* R (low k digits). Compare R against 10^k/2 (= "5" + (k-1) "0"s).
385+
* 6. Round:
386+
* R < 10^k/2 : keep Q
387+
* R > 10^k/2 : Q += 1
388+
* R == 10^k/2, f > 0 : Q += 1
389+
* R == 10^k/2, f == 0 : banker's (Q += 1 iff Q's last digit is odd)
390+
* 7. Output digits = decimal of Q with trailing zeros stripped,
391+
* decpt = k + len(Q_before_stripping) (so value = digits * 10^exp
392+
* with exp = decpt - len(digits) == k + stripped_zero_count,
393+
* preserving the Q * 10^k value). If Q == 0, emit "0" with decpt=1.
394+
*
395+
* Returns 1 on success, 0 on memory failure.
396+
* ------------------------------------------------------------------------- */
397+
static int
398+
ryu_mode3_neg(double d, int k,
399+
char **out_digits, int *decpt, int *sign, char **digits_end)
400+
{
401+
assert(k >= 1);
402+
403+
*sign = signbit(d) ? 1 : 0;
404+
405+
/* NaN / Infinity. Emit the literal string (no sign — caller tracks it
406+
* via *sign) and decpt=9999, matching Gay's dtoa convention. */
407+
if (isnan(d) || isinf(d)) {
408+
const char *lit = isnan(d) ? "NaN" : "Infinity";
409+
size_t n = strlen(lit);
410+
char *buf = (char *)PyMem_Malloc(n + 1);
411+
if (buf == NULL) return 0;
412+
memcpy(buf, lit, n + 1);
413+
*out_digits = buf;
414+
*digits_end = buf + n;
415+
*decpt = 9999;
416+
return 1;
417+
}
418+
419+
/* Zero (signed or unsigned). */
420+
if (d == 0.0) {
421+
char *buf = (char *)PyMem_Malloc(2);
422+
if (buf == NULL) return 0;
423+
buf[0] = '0'; buf[1] = '\0';
424+
*out_digits = buf;
425+
*digits_end = buf + 1;
426+
*decpt = 1;
427+
return 1;
428+
}
429+
430+
double ax = fabs(d);
431+
double ix = floor(ax);
432+
int frac_nonzero = (ax != ix);
433+
434+
/* Exact integer digits of ix. d2fixed with precision=0 on an integer
435+
* input performs no rounding: Ryu's first loop emits the exact digits
436+
* from POW10_SPLIT tables, and the fractional-rounding loop finds no
437+
* nonzero fractional digits. */
438+
char intbuf[_PYRYU_D2FIXED_BUFSIZE];
439+
int intlen = d2fixed_buffered_n(ix, 0, intbuf);
440+
/* ix >= 0, so no leading '-' in intbuf. */
441+
442+
/* Case: integer part has fewer digits than k. Value < 10^(k-1) since
443+
* intlen <= k-1 and d2fixed emits at least one digit. That is strictly
444+
* less than 10^k/2, so we round down to 0 regardless of fractional. */
445+
if (intlen < k) {
446+
char *buf = (char *)PyMem_Malloc(2);
447+
if (buf == NULL) return 0;
448+
buf[0] = '0'; buf[1] = '\0';
449+
*out_digits = buf;
450+
*digits_end = buf + 1;
451+
*decpt = 1;
452+
return 1;
453+
}
454+
455+
int q_len = intlen - k; /* digits of Q_before_rounding; may be 0 */
456+
457+
/* Compare R (the low k digits) against 10^k/2 ("5" + (k-1) zeros). */
458+
int cmp;
459+
{
460+
char r_first = intbuf[q_len];
461+
if (r_first < '5') {
462+
cmp = -1;
463+
}
464+
else if (r_first > '5') {
465+
cmp = 1;
466+
}
467+
else {
468+
cmp = 0;
469+
for (int i = q_len + 1; i < intlen; i++) {
470+
if (intbuf[i] != '0') {
471+
cmp = 1;
472+
break;
473+
}
474+
}
475+
}
476+
}
477+
478+
int round_up;
479+
if (cmp < 0) {
480+
round_up = 0;
481+
}
482+
else if (cmp > 0) {
483+
round_up = 1;
484+
}
485+
else {
486+
/* R == 10^k/2 exactly */
487+
if (frac_nonzero) {
488+
round_up = 1;
489+
}
490+
else {
491+
char q_last = (q_len > 0) ? intbuf[q_len - 1] : '0';
492+
round_up = ((q_last - '0') & 1) ? 1 : 0;
493+
}
494+
}
495+
496+
/* Build Q as a digit string, with room for a possible carry-out. */
497+
char *qbuf = (char *)PyMem_Malloc((size_t)q_len + 2);
498+
if (qbuf == NULL) return 0;
499+
if (q_len == 0) {
500+
qbuf[0] = round_up ? '1' : '0';
501+
qbuf[1] = '\0';
502+
}
503+
else {
504+
memcpy(qbuf, intbuf, (size_t)q_len);
505+
qbuf[q_len] = '\0';
506+
if (round_up) {
507+
int i = q_len - 1;
508+
while (i >= 0 && qbuf[i] == '9') {
509+
qbuf[i] = '0';
510+
i--;
511+
}
512+
if (i >= 0) {
513+
qbuf[i]++;
514+
}
515+
else {
516+
/* Carry propagated past leading digit — prepend '1'. */
517+
memmove(qbuf + 1, qbuf, (size_t)q_len);
518+
qbuf[0] = '1';
519+
qbuf[q_len + 1] = '\0';
520+
}
521+
}
522+
}
523+
int qlen = (int)strlen(qbuf);
524+
525+
/* Special case: rounded value is 0. */
526+
if (qlen == 1 && qbuf[0] == '0') {
527+
*out_digits = qbuf;
528+
*digits_end = qbuf + 1;
529+
*decpt = 1;
530+
return 1;
531+
}
532+
533+
/* value = Q * 10^k with Q's decimal digits = qbuf; decpt = k + qlen.
534+
* Strip trailing zeros from qbuf (decpt is unchanged since the
535+
* represented value is invariant under digits -> digits+"0" with
536+
* exp += 0 per our decpt formula). */
537+
*decpt = k + qlen;
538+
while (qlen > 1 && qbuf[qlen - 1] == '0') {
539+
qlen--;
540+
}
541+
qbuf[qlen] = '\0';
542+
543+
*out_digits = qbuf;
544+
*digits_end = qbuf + qlen;
545+
return 1;
546+
}
547+
343548
/* -------------------------------------------------------------------------
344549
* _PyRyu_dtoa – main entry point
345550
* ------------------------------------------------------------------------- */
@@ -366,7 +571,7 @@ _PyRyu_dtoa(double d, int mode, int ndigits,
366571
* for a total of P+1 significant digits.
367572
* So we pass precision = ndigits - 1. */
368573
int precision = (ndigits > 0) ? ndigits - 1 : 0;
369-
char *buf = (char *)PyMem_Malloc(_PYRYU_D2EXP_BUFSIZE);
574+
char *buf = (char *)PyMem_Malloc(_pyryu_d2exp_bufsize(precision));
370575
if (buf == NULL)
371576
return NULL;
372577
int len = d2exp_buffered_n(d, (uint32_t)precision, buf);
@@ -379,9 +584,15 @@ _PyRyu_dtoa(double d, int mode, int ndigits,
379584
}
380585
case 3: {
381586
/* ndigits digits after the decimal point (fixed-point format).
382-
* ndigits must be >= 0 here (negative case uses _Py_dg_dtoa). */
383-
assert(ndigits >= 0);
384-
char *buf = (char *)PyMem_Malloc(_PYRYU_D2FIXED_BUFSIZE);
587+
* ndigits < 0 means round to the nearest multiple of 10^(-ndigits),
588+
* used by float.__round__ with a negative argument. */
589+
if (ndigits < 0) {
590+
if (!ryu_mode3_neg(d, -ndigits, &out_digits, decpt, sign,
591+
digits_end))
592+
return NULL;
593+
break;
594+
}
595+
char *buf = (char *)PyMem_Malloc(_pyryu_d2fixed_bufsize(ndigits));
385596
if (buf == NULL)
386597
return NULL;
387598
int len = d2fixed_buffered_n(d, (uint32_t)ndigits, buf);

0 commit comments

Comments
 (0)