Skip to content

Commit 4abcd60

Browse files
committed
Implement digit separator skipping in number parsing
1 parent 70aa9a8 commit 4abcd60

1 file changed

Lines changed: 50 additions & 10 deletions

File tree

include/fast_float/ascii_number.h

Lines changed: 50 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -353,50 +353,78 @@ parse_number_string(UC const *p, UC const *pend,
353353
UC const *const start_digits = p;
354354

355355
uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad)
356+
int64_t digit_count = 0;
356357

357-
while ((p != pend) && is_integer(*p)) {
358+
while (p != pend) {
359+
if (options.digit_separator != UC('\0') && *p == options.digit_separator) {
360+
++p;
361+
continue;
362+
}
363+
if (!is_integer(*p)) {
364+
break;
365+
}
358366
// a multiplication by 10 is cheaper than an arbitrary integer
359367
// multiplication
360368
i = 10 * i +
361369
uint64_t(*p -
362370
UC('0')); // might overflow, we will handle the overflow later
363371
++p;
372+
++digit_count;
364373
}
365374
UC const *const end_of_integer_part = p;
366-
int64_t digit_count = int64_t(end_of_integer_part - start_digits);
367-
answer.integer = span<UC const>(start_digits, size_t(digit_count));
375+
answer.integer =
376+
span<UC const>(start_digits, size_t(end_of_integer_part - start_digits));
368377
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
369378
// at least 1 digit in integer part, without leading zeros
370379
if (digit_count == 0) {
371380
return report_parse_error<UC>(p, parse_error::no_digits_in_integer_part);
372381
}
373-
if ((start_digits[0] == UC('0') && digit_count > 1)) {
382+
UC const *first_digit = start_digits;
383+
while (first_digit != end_of_integer_part &&
384+
options.digit_separator != UC('\0') &&
385+
*first_digit == options.digit_separator) {
386+
++first_digit;
387+
}
388+
if (first_digit != end_of_integer_part && *first_digit == UC('0') &&
389+
digit_count > 1) {
374390
return report_parse_error<UC>(start_digits,
375391
parse_error::leading_zeros_in_integer_part);
376392
}
377393
}
378394

379395
int64_t exponent = 0;
396+
int64_t fractional_digit_count = 0;
380397
bool const has_decimal_point = (p != pend) && (*p == decimal_point);
381398
if (has_decimal_point) {
382399
++p;
383400
UC const *before = p;
384401
// can occur at most twice without overflowing, but let it occur more, since
385402
// for integers with many digits, digit parsing is the primary bottleneck.
386-
loop_parse_if_eight_digits(p, pend, i);
403+
if (options.digit_separator == UC('\0')) {
404+
loop_parse_if_eight_digits(p, pend, i);
405+
}
387406

388-
while ((p != pend) && is_integer(*p)) {
407+
while (p != pend) {
408+
if (options.digit_separator != UC('\0') &&
409+
*p == options.digit_separator) {
410+
++p;
411+
continue;
412+
}
413+
if (!is_integer(*p)) {
414+
break;
415+
}
389416
uint8_t digit = uint8_t(*p - UC('0'));
390417
++p;
391418
i = i * 10 + digit; // in rare cases, this will overflow, but that's ok
419+
++fractional_digit_count;
392420
}
393-
exponent = before - p;
421+
exponent = -fractional_digit_count;
394422
answer.fraction = span<UC const>(before, size_t(p - before));
395-
digit_count -= exponent;
423+
digit_count += fractional_digit_count;
396424
}
397425
FASTFLOAT_IF_CONSTEXPR17(basic_json_fmt) {
398426
// at least 1 digit in fractional part
399-
if (has_decimal_point && exponent == 0) {
427+
if (has_decimal_point && fractional_digit_count == 0) {
400428
return report_parse_error<UC>(p,
401429
parse_error::no_digits_in_fractional_part);
402430
}
@@ -467,7 +495,9 @@ parse_number_string(UC const *p, UC const *pend,
467495
// We need to be mindful of the case where we only have zeroes...
468496
// E.g., 0.000000000...000.
469497
UC const *start = start_digits;
470-
while ((start != pend) && (*start == UC('0') || *start == decimal_point)) {
498+
while ((start != pend) && (*start == UC('0') || *start == decimal_point ||
499+
(options.digit_separator != UC('\0') &&
500+
*start == options.digit_separator))) {
471501
if (*start == UC('0')) {
472502
digit_count--;
473503
}
@@ -484,6 +514,11 @@ parse_number_string(UC const *p, UC const *pend,
484514
UC const *int_end = p + answer.integer.len();
485515
uint64_t const minimal_nineteen_digit_integer{1000000000000000000};
486516
while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
517+
if (options.digit_separator != UC('\0') &&
518+
*p == options.digit_separator) {
519+
++p;
520+
continue;
521+
}
487522
i = i * 10 + uint64_t(*p - UC('0'));
488523
++p;
489524
}
@@ -493,6 +528,11 @@ parse_number_string(UC const *p, UC const *pend,
493528
p = answer.fraction.ptr;
494529
UC const *frac_end = p + answer.fraction.len();
495530
while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
531+
if (options.digit_separator != UC('\0') &&
532+
*p == options.digit_separator) {
533+
++p;
534+
continue;
535+
}
496536
i = i * 10 + uint64_t(*p - UC('0'));
497537
++p;
498538
}

0 commit comments

Comments
 (0)