gh-NNNN: Wire Python/_fmt and Python/_wuffs into the build

eendebakpt · claude · eendebakpt · commit 57d1c7d67cf6 · 2026-04-21T20:55:02.000+02:00
Adds the C-ABI shims and build rules for the two vendored libraries:

  Python/_fmt/fmt_dtoa.cc    fmt-backed _Py_fmt_dtoa / _Py_fmt_dtoa_free
                             (matches _Py_dg_dtoa's calling convention)
  Python/_wuffs/wuffs_strtod.c  wuffs-backed _Py_wuffs_strtod
                             (matches _Py_dg_strtod's calling convention)

Both are declared in pycore_dtoa.h alongside the legacy _Py_dg_* entry
points; no caller uses them yet — pystrtod.c still routes through dtoa.c.
The later commits swap the call sites and then delete dtoa.c.

Build details:

  * fmt_dtoa.cc compiles with -std=c++17 -fno-exceptions -fno-rtti plus
    FMT_HEADER_ONLY=1 and FMT_OPTIMIZE_SIZE=2. Header-only mode keeps us
    from needing fmt's compiled format.cc for Dragonbox tables and
    locale helpers; OPTIMIZE_SIZE=2 stubs out fmt's std::locale lookup
    so libpython doesn't pull in &lt;locale&gt;.

  * wuffs_strtod.c compiles as C with WUFFS_CONFIG__MODULE__BASE__CORE/
    FLOATCONV/INTCONV defines, so only the floatconv sub-module of the
    3.3 MB amalgamated wuffs source actually gets compiled.

  * LIBS gains -lstdc++ because fmt's allocator references
    std::bad_alloc/std::runtime_error vtables. This is the price of
    using a C++ library in libpython; fmt avoids the locale and
    iostream slices so the runtime dep stays small.

The new .o files land in LIBRARY_OBJS alongside Python/dtoa.o — both
are live in this commit so libpython has everything it needs whichever
path the next commits take.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/Include/internal/pycore_dtoa.h b/Include/internal/pycore_dtoa.h
@@ -33,6 +33,16 @@ extern void _Py_dg_freedtoa(char *s);
 extern PyStatus _PyDtoa_Init(PyInterpreterState *interp);
 extern void _PyDtoa_Fini(PyInterpreterState *interp);
 
+// Replacements for the two David Gay entry points above. _Py_fmt_dtoa is
+// backed by Python/_fmt/ (a vendored trim of fmtlib); _Py_wuffs_strtod is
+// backed by Python/_wuffs/ (a vendored trim of Wuffs). Calling conventions
+// match _Py_dg_dtoa / _Py_dg_strtod respectively so pystrtod.c can swap them
+// in without touching call-site logic.
+extern char* _Py_fmt_dtoa(double d, int mode, int ndigits,
+                          int *decpt, int *sign, char **rve);
+extern void _Py_fmt_dtoa_free(char *s);
+extern double _Py_wuffs_strtod(const char *nptr, char **endptr);
+
 
 #ifdef __cplusplus
 }
diff --git a/Makefile.pre.in b/Makefile.pre.in
@@ -294,7 +294,13 @@ PY_ENABLE_SHARED=	@PY_ENABLE_SHARED@
 STATIC_LIBPYTHON=	@STATIC_LIBPYTHON@
 
 
-LIBS=		@LIBS@
+LIBS=		@LIBS@ -lstdc++
+# -lstdc++ is required because Python/_fmt/fmt_dtoa.cc (the float-to-string
+# shim backed by fmtlib) references std::bad_alloc / std::runtime_error
+# vtables. This is the price of using a C++ library in libpython; fmt takes
+# care to avoid the locale and iostream slices (FMT_OPTIMIZE_SIZE=2) so the
+# runtime dep is small. Long-term this should move into configure.ac as a
+# conditional on "does libpython have a C++ TU".
 LIBM=		@LIBM@
 LIBC=		@LIBC@
 SYSLIBS=	$(LIBM) $(LIBC)
@@ -508,6 +514,8 @@ PYTHON_OBJS=	\
 		Python/pystrtod.o \
 		Python/pystrhex.o \
 		Python/dtoa.o \
+		Python/_fmt/fmt_dtoa.o \
+		Python/_wuffs/wuffs_strtod.o \
 		Python/fileutils.o \
 		Python/suggestions.o \
 		Python/perf_trampoline.o \
@@ -3230,6 +3238,25 @@ regen-jit:
 Python/dtoa.o: Python/dtoa.c
 	$(CC) -c $(PY_CORE_CFLAGS) $(CFLAGS_ALIASING) -o $@ $<
 
+# Vendored {fmt} float-to-string path — see Python/_fmt/README.vendor.
+# Compiled as C++17 with exceptions and RTTI disabled so libpython doesn't
+# acquire a libstdc++ runtime dependency beyond what operator new pulls in.
+# PY_CORE_CFLAGS carries a handful of C-only flags (-std=c11,
+# -Werror=implicit-function-declaration, -Wstrict-prototypes) that C++
+# compilers warn about; strip them for this TU.
+_FMT_CXX_CFLAGS = $(filter-out -std=c11 -std=c99 -Werror=implicit-function-declaration -Wstrict-prototypes,$(PY_CORE_CFLAGS))
+Python/_fmt/fmt_dtoa.o: Python/_fmt/fmt_dtoa.cc Python/_fmt/format.h \
+		Python/_fmt/format-inl.h Python/_fmt/base.h
+	$(CXX) -c $(_FMT_CXX_CFLAGS) -std=c++17 -fno-exceptions -fno-rtti \
+		-IPython/_fmt -o $@ $<
+
+# Vendored Wuffs string-to-float path — see Python/_wuffs/README.vendor.
+# wuffs-v0.4.c is 3.3 MB of amalgamated C; the WUFFS_CONFIG__MODULE__BASE__*
+# defines restrict compilation to just the floatconv sub-module.
+Python/_wuffs/wuffs_strtod.o: Python/_wuffs/wuffs_strtod.c \
+		Python/_wuffs/wuffs-v0.4.c
+	$(CC) -c $(PY_CORE_CFLAGS) -IPython/_wuffs -o $@ $<
+
 Python/ceval.o: Python/ceval.c
 	$(CC) -c $(PY_CORE_CFLAGS) $(CFLAGS_CEVAL) -o $@ $<
 
diff --git a/Python/_fmt/fmt_dtoa.cc b/Python/_fmt/fmt_dtoa.cc
@@ -0,0 +1,154 @@
+// Bridge between {fmt}'s float formatting and the _Py_dg_dtoa calling
+// convention used throughout Python/pystrtod.c and Objects/floatobject.c.
+//
+// Exposes a single extern "C" entry point, _Py_fmt_dtoa, with the exact shape
+// of _Py_dg_dtoa (the David Gay dtoa that used to live in Python/dtoa.c) so
+// existing call sites can swap with a one-line change.
+//
+//   mode 0 -> shortest round-trip decimal (uses fmt::detail::dragonbox)
+//   mode 2 -> max(1, ndigits) significant digits (uses format_float with
+//             presentation_type::exp and precision = N - 1)
+//   mode 3 -> ndigits digits past the decimal point, ndigits may be negative
+//             (uses format_float with presentation_type::fixed)
+//
+// Trailing zeros are stripped so the digit string + decpt shape matches
+// _Py_dg_dtoa byte-for-byte. The "rounded-to-zero-at-target-precision" case
+// for mode 3 is normalised from fmt's "one '0' digit" output to cpython's
+// empty-digit + decpt = -ndigits convention.
+//
+// Memory model: the returned char* is allocated with PyMem_Malloc and must be
+// freed via _Py_fmt_dtoa_free (mirrors _Py_dg_freedtoa).
+
+// Build fmt in header-only mode so we don't need a separate compiled
+// format.cc TU for the Dragonbox tables and the `write_fixed` locale hooks.
+// FMT_OPTIMIZE_SIZE >= 2 disables fmt's std::locale-based locale lookup,
+// which cpython's dtoa never uses anyway and which would otherwise drag
+// libstdc++'s <locale> into libpython.
+#define FMT_HEADER_ONLY 1
+#define FMT_OPTIMIZE_SIZE 2
+
+#include "format.h"
+#include "format-inl.h"  // brings in out-of-line dragonbox tables; FMT_FUNC
+                         // resolves to `inline` in header-only mode.
+
+#include <Python.h>
+
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <cstring>
+
+extern "C" {
+
+void _Py_fmt_dtoa_free(char *s) {
+    PyMem_Free(s);
+}
+
+static char *alloc_copy(const char *src, size_t n) {
+    char *out = static_cast<char *>(PyMem_Malloc(n + 1));
+    if (!out) return nullptr;
+    std::memcpy(out, src, n);
+    out[n] = '\0';
+    return out;
+}
+
+char *_Py_fmt_dtoa(double d, int mode, int ndigits,
+                   int *decpt, int *sign, char **rve) {
+    // Sign. _Py_dg_dtoa distinguishes -0.0 from +0.0 via *sign.
+    *sign = std::signbit(d) ? 1 : 0;
+    double abs_d = *sign ? -d : d;
+
+    // Infinity / NaN / zero: cpython conventions, no fmt involvement.
+    if (std::isinf(abs_d)) {
+        *decpt = 9999;
+        char *out = alloc_copy("Infinity", 8);
+        if (!out) return nullptr;
+        if (rve) *rve = out + 8;
+        return out;
+    }
+    if (std::isnan(abs_d)) {
+        *decpt = 9999;
+        char *out = alloc_copy("NaN", 3);
+        if (!out) return nullptr;
+        if (rve) *rve = out + 3;
+        return out;
+    }
+    if (abs_d == 0.0) {
+        *decpt = 1;
+        char *out = alloc_copy("0", 1);
+        if (!out) return nullptr;
+        if (rve) *rve = out + 1;
+        return out;
+    }
+
+    // Mode 0 (shortest) uses Dragonbox directly.
+    if (mode == 0) {
+        auto r = fmt::detail::dragonbox::to_decimal(abs_d);
+        char tmp[32];
+        int n = std::snprintf(tmp, sizeof(tmp), "%llu",
+                              (unsigned long long)r.significand);
+        int exp = r.exponent;
+        while (n > 1 && tmp[n - 1] == '0') { --n; ++exp; }
+        *decpt = exp + n;
+        char *out = alloc_copy(tmp, (size_t)n);
+        if (!out) return nullptr;
+        if (rve) *rve = out + n;
+        return out;
+    }
+
+    // Modes 2 and 3: fmt::detail::format_float writes digits (no dot, no
+    // sign, no exponent) into a buffer and returns the decimal exponent.
+    fmt::format_specs specs;
+    int precision;
+    bool fixed;
+    if (mode == 2) {
+        specs.set_type(fmt::presentation_type::exp);
+        // max(1, ndigits) significant digits, fmt wants precision = N - 1.
+        precision = (ndigits < 1 ? 1 : ndigits) - 1;
+        fixed = false;
+    } else {
+        // Treat any unrecognised mode as 3 (matches _Py_dg_dtoa's fallthrough).
+        specs.set_type(fmt::presentation_type::fixed);
+        precision = ndigits;
+        fixed = true;
+    }
+
+    fmt::memory_buffer buf;
+    int exp = fmt::detail::format_float(abs_d, precision, specs,
+                                        /*binary32=*/false, buf);
+    const char *bd = buf.data();
+    int n = static_cast<int>(buf.size());
+
+    // Normalise fmt's rounded-to-zero output to cpython's "no_digits" shape:
+    // fmt may emit a single '0' (e.g. round(5, -1) == 0); cpython returns an
+    // empty digit string with decpt = -ndigits.
+    if (fixed && n == 1 && bd[0] == '0') {
+        char *out = static_cast<char *>(PyMem_Malloc(1));
+        if (!out) return nullptr;
+        out[0] = '\0';
+        *decpt = -ndigits;
+        if (rve) *rve = out;
+        return out;
+    }
+    // Similarly handle the fixed case where fmt emitted 0 digits (precision
+    // was so negative the whole value rounded away).
+    if (fixed && n == 0) {
+        char *out = static_cast<char *>(PyMem_Malloc(1));
+        if (!out) return nullptr;
+        out[0] = '\0';
+        *decpt = -ndigits;
+        if (rve) *rve = out;
+        return out;
+    }
+
+    // Strip trailing zeros (cpython mode-2/3 invariant).
+    int nz = n;
+    while (nz > 1 && bd[nz - 1] == '0') --nz;
+    *decpt = exp + n;                    // decpt uses pre-strip length
+    char *out = alloc_copy(bd, (size_t)nz);
+    if (!out) return nullptr;
+    if (rve) *rve = out + nz;
+    return out;
+}
+
+}  // extern "C"
diff --git a/Python/_wuffs/wuffs_strtod.c b/Python/_wuffs/wuffs_strtod.c
@@ -0,0 +1,134 @@
+// Bridge between Wuffs's wuffs_base__parse_number_f64 and the _Py_dg_strtod
+// calling convention used by Python/pystrtod.c.
+//
+// _Py_dg_strtod's contract (from the David Gay dtoa that used to live in
+// Python/dtoa.c):
+//
+//   double _Py_dg_strtod(const char *nptr, char **endptr);
+//
+//   * Skips leading whitespace (like the C standard strtod).
+//   * Consumes an optional sign, a decimal mantissa (with optional '.'),
+//     and an optional 'e'/'E' exponent.
+//   * Does NOT accept infinities or NaNs — the caller in pystrtod.c falls
+//     back to _Py_parse_inf_or_nan when we don't consume anything.
+//   * On success *endptr points past the last consumed character.
+//   * On parse failure *endptr == nptr, returns 0.
+//   * On overflow returns +/-HUGE_VAL and sets errno = ERANGE.
+//   * On underflow returns the nearest representable value (possibly 0) and
+//     sets errno = ERANGE.
+//
+// Wuffs is stricter: wuffs_base__parse_number_f64 requires the whole slice
+// to be consumed, returns an in-band status, and does not touch errno. So
+// the shim has to:
+//
+//   (1) scan the ASCII tail itself to find where the numeric syntax ends,
+//   (2) hand wuffs just that slice,
+//   (3) translate wuffs's result (including inf-on-overflow) back into the
+//       strtod errno discipline.
+
+// Compile only the base/floatconv path of Wuffs into this TU. With these
+// defines set (before the include), the preprocessor prunes every other
+// module (image codecs, JSON, compression, ...), cutting a ~3.3 MB source
+// down to ~80-100 KB of object code. `WUFFS_IMPLEMENTATION` activates
+// function definitions alongside declarations.
+#define WUFFS_IMPLEMENTATION
+#define WUFFS_CONFIG__STATIC_FUNCTIONS
+#define WUFFS_CONFIG__MODULES
+#define WUFFS_CONFIG__MODULE__BASE
+#define WUFFS_CONFIG__MODULE__BASE__CORE
+#define WUFFS_CONFIG__MODULE__BASE__FLOATCONV
+#define WUFFS_CONFIG__MODULE__BASE__INTCONV
+#include "wuffs-v0.4.c"
+
+#include <Python.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <math.h>
+#include <stddef.h>
+#include <string.h>
+
+// Scan forward from `p` and return the first character that isn't part of a
+// valid strtod-style numeric literal (after the sign we've already stepped
+// past). Returns `p` itself when no digits were found — the caller uses that
+// to signal "parse failure, don't consume".
+static const char *
+scan_number_end(const char *p)
+{
+    const char *start = p;
+    int have_int = 0, have_frac = 0;
+    while (isdigit((unsigned char)*p)) { ++p; have_int = 1; }
+    if (*p == '.') {
+        ++p;
+        while (isdigit((unsigned char)*p)) { ++p; have_frac = 1; }
+    }
+    if (!have_int && !have_frac) return start;  // no digits at all
+    if (*p == 'e' || *p == 'E') {
+        const char *exp_at = p;
+        ++p;
+        if (*p == '+' || *p == '-') ++p;
+        int have_exp_digits = 0;
+        while (isdigit((unsigned char)*p)) { ++p; have_exp_digits = 1; }
+        if (!have_exp_digits) p = exp_at;   // malformed exponent; back out
+    }
+    return p;
+}
+
+double
+_Py_wuffs_strtod(const char *nptr, char **endptr)
+{
+    const char *p = nptr;
+
+    // Leading whitespace (strtod semantics).
+    while (isspace((unsigned char)*p)) ++p;
+
+    const char *sign_start = p;
+    if (*p == '+' || *p == '-') ++p;
+
+    const char *digits_start = p;
+    const char *digits_end = scan_number_end(p);
+
+    if (digits_end == digits_start) {
+        // No numeric content. Caller (pystrtod.c) will then try
+        // _Py_parse_inf_or_nan.
+        if (endptr) *endptr = (char *)nptr;
+        return 0.0;
+    }
+
+    // Hand wuffs the [sign_start, digits_end) slice. We include the sign so
+    // wuffs handles +/- consistently with strtod. Wuffs rejects leading
+    // zeros by default (e.g. "00.7"), so opt in to ALLOW_MULTIPLE_LEADING_ZEROES.
+    // REJECT_INF_AND_NAN mirrors _Py_dg_strtod — pystrtod.c's
+    // _Py_parse_inf_or_nan handles those separately.
+    wuffs_base__slice_u8 slice = wuffs_base__make_slice_u8(
+        (uint8_t *)sign_start, (size_t)(digits_end - sign_start));
+    uint32_t options =
+        WUFFS_BASE__PARSE_NUMBER_XXX__ALLOW_MULTIPLE_LEADING_ZEROES
+        | WUFFS_BASE__PARSE_NUMBER_FXX__REJECT_INF_AND_NAN;
+
+    wuffs_base__result_f64 r = wuffs_base__parse_number_f64(slice, options);
+    if (r.status.repr != NULL) {
+        if (endptr) *endptr = (char *)nptr;
+        return 0.0;
+    }
+
+    if (endptr) *endptr = (char *)digits_end;
+
+    // Overflow: wuffs returns +/-inf silently; strtod convention is
+    // HUGE_VAL + errno=ERANGE.
+    if (isinf(r.value)) {
+        errno = ERANGE;
+    }
+    // Underflow: parsed value is zero but the numeric substring had at least
+    // one non-zero digit.
+    else if (r.value == 0.0) {
+        for (const char *q = digits_start; q < digits_end; ++q) {
+            if (*q >= '1' && *q <= '9') {
+                errno = ERANGE;
+                break;
+            }
+        }
+    }
+
+    return r.value;
+}