Skip to content

Commit 3044c9b

Browse files
authored
Merge pull request #387 from fastfloat/pr386
Using unlikely markers for PR386
2 parents 6258cbc + 29bd115 commit 3044c9b

3 files changed

Lines changed: 128 additions & 28 deletions

File tree

include/fast_float/ascii_number.h

Lines changed: 40 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -330,10 +330,17 @@ report_parse_error(UC const *p, parse_error error) {
330330

331331
// Assuming that you use no more than 19 digits, this will
332332
// parse an ASCII string.
333+
//
334+
// store_spans is a *runtime* flag (not a template parameter, deliberately: a
335+
// template would create a second instantiation of this whole function and the
336+
// extra icache pressure wipes out the gain). When false, the integer/fraction
337+
// spans (read only by the rare digit_comp slow path) are not materialized,
338+
// which keeps the fat parsed_number_string_t off the hot path. The caller
339+
// re-parses with store_spans=true if the slow path is actually reached.
333340
template <bool basic_json_fmt, typename UC>
334341
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
335-
parse_number_string(UC const *p, UC const *pend,
336-
parse_options_t<UC> options) noexcept {
342+
parse_number_string(UC const *p, UC const *pend, parse_options_t<UC> options,
343+
bool store_spans = true) noexcept {
337344
chars_format const fmt = detail::adjust_for_feature_macros(options.format);
338345
UC const decimal_point = options.decimal_point;
339346

@@ -402,7 +409,9 @@ parse_number_string(UC const *p, UC const *pend,
402409
}
403410
UC const *const end_of_integer_part = p;
404411
int64_t digit_count = int64_t(end_of_integer_part - start_digits);
405-
answer.integer = span<UC const>(start_digits, size_t(digit_count));
412+
if (store_spans) {
413+
answer.integer = span<UC const>(start_digits, size_t(digit_count));
414+
}
406415
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
407416
// at least 1 digit in integer part, without leading zeros
408417
if (digit_count == 0) {
@@ -429,7 +438,9 @@ parse_number_string(UC const *p, UC const *pend,
429438
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
430439
}
431440
exponent = before - p;
432-
answer.fraction = span<UC const>(before, size_t(p - before));
441+
if (store_spans) {
442+
answer.fraction = span<UC const>(before, size_t(p - before));
443+
}
433444
digit_count -= exponent;
434445
}
435446
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
@@ -514,29 +525,35 @@ parse_number_string(UC const *p, UC const *pend,
514525

515526
if (digit_count > 19) {
516527
answer.too_many_digits = true;
517-
// Let us start again, this time, avoiding overflows.
518-
// We don't need to call if is_integer, since we use the
519-
// pre-tokenized spans from above.
520-
i = 0;
521-
p = answer.integer.ptr;
522-
UC const *int_end = p + answer.integer.len();
523-
uint64_t const minimal_nineteen_digit_integer{1000000000000000000};
524-
while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
525-
i = i * 10 + uint64_t(*p - UC('0'));
526-
++p;
527-
}
528-
if (i >= minimal_nineteen_digit_integer) { // We have a big integer
529-
exponent = end_of_integer_part - p + exp_number;
530-
} else { // We have a value with a fractional component.
531-
p = answer.fraction.ptr;
532-
UC const *frac_end = p + answer.fraction.len();
533-
while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
528+
// The truncation recompute below reads the integer/fraction spans. When
529+
// store_spans is false we didn't materialize them, so just flag
530+
// too_many_digits; the caller re-parses with store_spans=true to obtain
531+
// the corrected mantissa/exponent before taking the slow path.
532+
if (store_spans) {
533+
// Let us start again, this time, avoiding overflows.
534+
// We don't need to call if is_integer, since we use the
535+
// pre-tokenized spans from above.
536+
i = 0;
537+
p = answer.integer.ptr;
538+
UC const *int_end = p + answer.integer.len();
539+
uint64_t const minimal_nineteen_digit_integer{1000000000000000000};
540+
while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
534541
i = i * 10 + uint64_t(*p - UC('0'));
535542
++p;
536543
}
537-
exponent = answer.fraction.ptr - p + exp_number;
544+
if (i >= minimal_nineteen_digit_integer) { // We have a big integer
545+
exponent = end_of_integer_part - p + exp_number;
546+
} else { // We have a value with a fractional component.
547+
p = answer.fraction.ptr;
548+
UC const *frac_end = p + answer.fraction.len();
549+
while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
550+
i = i * 10 + uint64_t(*p - UC('0'));
551+
++p;
552+
}
553+
exponent = answer.fraction.ptr - p + exp_number;
554+
}
555+
// We have now corrected both exponent and i, to a truncated value
538556
}
539-
// We have now corrected both exponent and i, to a truncated value
540557
}
541558
}
542559
answer.exponent = exponent;

include/fast_float/float_common.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,28 @@ using parse_options = parse_options_t<char>;
197197
#define fastfloat_really_inline inline __attribute__((always_inline))
198198
#endif
199199

200+
// Branch-probability hint marking the rare slow-path branches as cold, so the
201+
// optimizer keeps the out-of-line slow-path re-parse off the hot path (and does
202+
// not duplicate the force-inlined hot scanner into the caller, which bloated
203+
// the hot frame and hurt ILP on some targets). Used at the call site as
204+
// if fastfloat_unlikely(cond) { ... }
205+
// (the macro supplies the parentheses). It expands to the standard [[unlikely]]
206+
// attribute when supported, otherwise to __builtin_expect on GCC/Clang, or
207+
// to a no-op elsewhere (e.g. pre-C++20 MSVC, which has no equivalent hint).
208+
#ifdef __has_cpp_attribute
209+
#if __has_cpp_attribute(unlikely) >= 201803L
210+
#define FASTFLOAT_USE_UNLIKELY_ATTR 1
211+
#endif
212+
#endif
213+
214+
#ifdef FASTFLOAT_USE_UNLIKELY_ATTR
215+
#define fastfloat_unlikely(x) (x) [[unlikely]]
216+
#elif defined(__GNUC__) || defined(__clang__)
217+
#define fastfloat_unlikely(x) (__builtin_expect(!!(x), 0))
218+
#else
219+
#define fastfloat_unlikely(x) (x)
220+
#endif
221+
200222
#ifndef FASTFLOAT_ASSERT
201223
#define FASTFLOAT_ASSERT(x) \
202224
{ ((void)(x)); }

include/fast_float/parse_number.h

Lines changed: 66 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,23 @@ from_chars_advanced(parsed_number_string_t<UC> &pns, T &value) noexcept {
289289
return answer;
290290
}
291291

292+
// Slow path: re-parse materializing the integer/fraction spans the hot no-span
293+
// parse skipped, then run the full algorithm. The two callers reach it only
294+
// through a fastfloat_unlikely branch, so the optimizer keeps this re-parse off
295+
// the hot path on its own (no function-level noinline needed).
296+
// from_chars_advanced already handles both the too_many_digits disambiguation
297+
// and the am.power2<0 digit_comp recompute, so both slow branches collapse to
298+
// one helper call.
299+
template <typename T, typename UC>
300+
FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
301+
parse_number_slow_path(UC const *first, UC const *last, T &value,
302+
parse_options_t<UC> options, bool bjf) noexcept {
303+
parsed_number_string_t<UC> pns =
304+
bjf ? parse_number_string<true, UC>(first, last, options, true)
305+
: parse_number_string<false, UC>(first, last, options, true);
306+
return from_chars_advanced(pns, value);
307+
}
308+
292309
template <typename T, typename UC>
293310
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 from_chars_result_t<UC>
294311
from_chars_float_advanced(UC const *first, UC const *last, T &value,
@@ -312,10 +329,15 @@ from_chars_float_advanced(UC const *first, UC const *last, T &value,
312329
answer.ptr = first;
313330
return answer;
314331
}
332+
bool const bjf = uint64_t(fmt & detail::basic_json_fmt) != 0;
333+
334+
// Fast path: parse WITHOUT materializing the integer/fraction spans (read
335+
// only by the rare slow paths). Skipping their stores keeps the fat
336+
// parsed_number_string_t off the hot path. store_spans is a runtime argument,
337+
// so this reuses the single parse_number_string instantiation.
315338
parsed_number_string_t<UC> pns =
316-
uint64_t(fmt & detail::basic_json_fmt)
317-
? parse_number_string<true, UC>(first, last, options)
318-
: parse_number_string<false, UC>(first, last, options);
339+
bjf ? parse_number_string<true, UC>(first, last, options, false)
340+
: parse_number_string<false, UC>(first, last, options, false);
319341
if (!pns.valid) {
320342
if (uint64_t(fmt & chars_format::no_infnan)) {
321343
answer.ec = std::errc::invalid_argument;
@@ -326,8 +348,47 @@ from_chars_float_advanced(UC const *first, UC const *last, T &value,
326348
}
327349
}
328350

329-
// call overload that takes parsed_number_string_t directly.
330-
return from_chars_advanced(pns, value);
351+
// Slow path A (rare): > 19 significant digits. The no-span parse left the
352+
// mantissa un-truncated and skipped the span-based recompute; the cold helper
353+
// re-parses with spans and runs the full algorithm.
354+
//
355+
// We have to disable -Wc++20-extensions for the [[unlikely]] attribute
356+
// See comment for @jwakely at
357+
// https://github.com/fastfloat/fast_float/pull/387#discussion_r3366943539
358+
// This is unfortunate.
359+
#ifdef __clang__
360+
#pragma clang diagnostic push
361+
#pragma clang diagnostic ignored "-Wc++20-extensions"
362+
#endif
363+
if fastfloat_unlikely (pns.too_many_digits) {
364+
return parse_number_slow_path<T, UC>(first, last, value, options, bjf);
365+
}
366+
answer.ec = std::errc(); // be optimistic
367+
answer.ptr = pns.lastmatch;
368+
369+
if (clinger_fast_path_impl(pns.mantissa, pns.exponent, pns.negative, value)) {
370+
return answer;
371+
}
372+
373+
adjusted_mantissa am =
374+
compute_float<binary_format<T>>(pns.exponent, pns.mantissa);
375+
// Slow path B (rare): Eisel-Lemire could not resolve; digit_comp needs the
376+
// integer/fraction spans. Route to the cold helper (clinger there is a
377+
// dead-effect since it already failed here; the cold re-parse + digit_comp
378+
// via from_chars_advanced reproduces this branch).
379+
if fastfloat_unlikely (am.power2 < 0) {
380+
return parse_number_slow_path<T, UC>(first, last, value, options, bjf);
381+
}
382+
#ifdef __clang__
383+
#pragma clang diagnostic pop
384+
#endif
385+
to_float(pns.negative, am, value);
386+
// Test for over/underflow.
387+
if ((pns.mantissa != 0 && am.mantissa == 0 && am.power2 == 0) ||
388+
am.power2 == binary_format<T>::infinite_power()) {
389+
answer.ec = std::errc::result_out_of_range;
390+
}
391+
return answer;
331392
}
332393

333394
template <typename T, typename UC, typename>

0 commit comments

Comments
 (0)