@@ -252,6 +252,72 @@ parse_ryu_d2exp_output(const char *ryu_buf, int ryu_len,
252252 return 1 ;
253253}
254254
255+ /* -------------------------------------------------------------------------
256+ * parse_ryu_d2exp_inplace
257+ *
258+ * Like parse_ryu_d2exp_output, but takes a PyMem_Malloc'd buffer and
259+ * rewrites its contents in place — the same buffer is returned as
260+ * *out_digits (ownership transferred to the caller, who must free it).
261+ * No extra heap allocation is performed.
262+ *
263+ * Safe because the mantissa is compacted toward the front of the buffer
264+ * (write cursor ≤ read cursor − 0 always) and the exponent suffix lies
265+ * strictly past the highest write position.
266+ * ------------------------------------------------------------------------- */
267+ static int
268+ parse_ryu_d2exp_inplace (char * buf , int len ,
269+ char * * out_digits , int * decpt , int * sign ,
270+ char * * digits_end )
271+ {
272+ char * p = buf ;
273+ char * end = buf + len ;
274+
275+ * sign = 0 ;
276+ if (p < end && * p == '-' ) { * sign = 1 ; ++ p ; }
277+
278+ if (p < end && (* p == 'N' || * p == 'n' || * p == 'I' || * p == 'i' )) {
279+ size_t special_len = (size_t )(end - p );
280+ if (p != buf ) memmove (buf , p , special_len );
281+ buf [special_len ] = '\0' ;
282+ * out_digits = buf ;
283+ * digits_end = buf + special_len ;
284+ * decpt = 9999 ;
285+ return 1 ;
286+ }
287+
288+ int mant_len = 0 ;
289+ int dot_pos = -1 ;
290+ while (p < end && * p != 'e' && * p != 'E' ) {
291+ if (* p == '.' ) {
292+ dot_pos = mant_len ;
293+ } else {
294+ buf [mant_len ++ ] = * p ;
295+ }
296+ ++ p ;
297+ }
298+ if (dot_pos < 0 ) dot_pos = mant_len ;
299+
300+ int exp = 0 ;
301+ if (p < end && (* p == 'e' || * p == 'E' )) {
302+ ++ p ;
303+ int exp_sign = 1 ;
304+ if (p < end && * p == '-' ) { exp_sign = -1 ; ++ p ; }
305+ else if (p < end && * p == '+' ) { ++ p ; }
306+ while (p < end ) { exp = exp * 10 + (* p - '0' ); ++ p ; }
307+ exp *= exp_sign ;
308+ }
309+
310+ * decpt = dot_pos + exp ;
311+
312+ while (mant_len > 1 && buf [mant_len - 1 ] == '0' )
313+ -- mant_len ;
314+
315+ buf [mant_len ] = '\0' ;
316+ * out_digits = buf ;
317+ * digits_end = buf + mant_len ;
318+ return 1 ;
319+ }
320+
255321/* -------------------------------------------------------------------------
256322 * parse_ryu_d2fixed_output
257323 *
@@ -363,6 +429,71 @@ parse_ryu_d2fixed_output(const char *ryu_buf, int ryu_len,
363429 return 1 ;
364430}
365431
432+ /* -------------------------------------------------------------------------
433+ * parse_ryu_d2fixed_inplace
434+ *
435+ * Like parse_ryu_d2fixed_output, but takes a PyMem_Malloc'd buffer and
436+ * rewrites its contents in place — the same buffer is returned as
437+ * *out_digits (ownership transferred to the caller).
438+ * ------------------------------------------------------------------------- */
439+ static int
440+ parse_ryu_d2fixed_inplace (char * buf , int len ,
441+ char * * out_digits , int * decpt , int * sign ,
442+ char * * digits_end )
443+ {
444+ char * p = buf ;
445+ char * end = buf + len ;
446+
447+ * sign = 0 ;
448+ if (p < end && * p == '-' ) { * sign = 1 ; ++ p ; }
449+
450+ if (p < end && (* p == 'N' || * p == 'n' || * p == 'I' || * p == 'i' )) {
451+ size_t special_len = (size_t )(end - p );
452+ if (p != buf ) memmove (buf , p , special_len );
453+ buf [special_len ] = '\0' ;
454+ * out_digits = buf ;
455+ * digits_end = buf + special_len ;
456+ * decpt = 9999 ;
457+ return 1 ;
458+ }
459+
460+ int mant_len = 0 ;
461+ int int_digits = -1 ;
462+ while (p < end ) {
463+ if (* p == '.' ) {
464+ int_digits = mant_len ;
465+ } else {
466+ buf [mant_len ++ ] = * p ;
467+ }
468+ ++ p ;
469+ }
470+ if (int_digits < 0 )
471+ int_digits = mant_len ;
472+
473+ int first_nonzero = 0 ;
474+ while (first_nonzero < mant_len && buf [first_nonzero ] == '0' )
475+ ++ first_nonzero ;
476+
477+ if (first_nonzero == mant_len ) {
478+ buf [0 ] = '0' ;
479+ buf [1 ] = '\0' ;
480+ * decpt = 1 ;
481+ * out_digits = buf ;
482+ * digits_end = buf + 1 ;
483+ return 1 ;
484+ }
485+
486+ * decpt = int_digits - first_nonzero ;
487+
488+ mant_len -= first_nonzero ;
489+ memmove (buf , buf + first_nonzero , (size_t )mant_len );
490+ buf [mant_len ] = '\0' ;
491+
492+ * out_digits = buf ;
493+ * digits_end = buf + mant_len ;
494+ return 1 ;
495+ }
496+
366497/* -------------------------------------------------------------------------
367498 * ryu_mode3_neg
368499 *
@@ -569,38 +700,68 @@ _PyRyu_dtoa(double d, int mode, int ndigits,
569700 * Gay's mode 2 with ndigits=N gives N significant digits total.
570701 * d2exp with precision=P gives 1 digit before the point and P after,
571702 * for a total of P+1 significant digits.
572- * So we pass precision = ndigits - 1. */
703+ * So we pass precision = ndigits - 1.
704+ *
705+ * Fast path: for typical precision (fits in 256B), Ryu writes to
706+ * a stack buffer and parse_ryu_d2exp_output copies out the small
707+ * mantissa. Slow path: heap-allocate a work buffer, parse it in
708+ * place, transfer ownership to *out_digits — one heap alloc total.
709+ */
573710 int precision = (ndigits > 0 ) ? ndigits - 1 : 0 ;
574- char * buf = (char * )PyMem_Malloc (_pyryu_d2exp_bufsize (precision ));
575- if (buf == NULL )
576- return NULL ;
577- int len = d2exp_buffered_n (d , (uint32_t )precision , buf );
578- int ok = parse_ryu_d2exp_output (buf , len , & out_digits , decpt , sign ,
579- digits_end );
580- PyMem_Free (buf );
581- if (!ok )
582- return NULL ;
711+ size_t need = _pyryu_d2exp_bufsize (precision );
712+ char stack_buf [256 ];
713+ if (need <= sizeof (stack_buf )) {
714+ int len = d2exp_buffered_n (d , (uint32_t )precision , stack_buf );
715+ if (!parse_ryu_d2exp_output (stack_buf , len , & out_digits , decpt ,
716+ sign , digits_end ))
717+ return NULL ;
718+ }
719+ else {
720+ char * buf = (char * )PyMem_Malloc (need );
721+ if (buf == NULL )
722+ return NULL ;
723+ int len = d2exp_buffered_n (d , (uint32_t )precision , buf );
724+ if (!parse_ryu_d2exp_inplace (buf , len , & out_digits , decpt ,
725+ sign , digits_end )) {
726+ PyMem_Free (buf );
727+ return NULL ;
728+ }
729+ }
583730 break ;
584731 }
585732 case 3 : {
586733 /* ndigits digits after the decimal point (fixed-point format).
587734 * ndigits < 0 means round to the nearest multiple of 10^(-ndigits),
588- * used by float.__round__ with a negative argument. */
735+ * used by float.__round__ with a negative argument.
736+ *
737+ * Fast path: for typical precision (fits in 768B — enough for every
738+ * double's integer part plus ≲ 450 fractional digits), use a stack
739+ * buffer + copy-parse. Slow path: heap + in-place parse + steal.
740+ */
589741 if (ndigits < 0 ) {
590742 if (!ryu_mode3_neg (d , - ndigits , & out_digits , decpt , sign ,
591743 digits_end ))
592744 return NULL ;
593745 break ;
594746 }
595- char * buf = (char * )PyMem_Malloc (_pyryu_d2fixed_bufsize (ndigits ));
747+ size_t need = _pyryu_d2fixed_bufsize (ndigits );
748+ char stack_buf [768 ];
749+ if (need <= sizeof (stack_buf )) {
750+ int len = d2fixed_buffered_n (d , (uint32_t )ndigits , stack_buf );
751+ if (!parse_ryu_d2fixed_output (stack_buf , len , & out_digits ,
752+ decpt , sign , digits_end ))
753+ return NULL ;
754+ break ;
755+ }
756+ char * buf = (char * )PyMem_Malloc (need );
596757 if (buf == NULL )
597758 return NULL ;
598759 int len = d2fixed_buffered_n (d , (uint32_t )ndigits , buf );
599- int ok = parse_ryu_d2fixed_output (buf , len , & out_digits , decpt , sign ,
600- digits_end );
601- PyMem_Free (buf );
602- if (!ok )
760+ if (!parse_ryu_d2fixed_inplace (buf , len , & out_digits , decpt , sign ,
761+ digits_end )) {
762+ PyMem_Free (buf );
603763 return NULL ;
764+ }
604765 break ;
605766 }
606767 default :
0 commit comments