Skip to content

Commit babb1f3

Browse files
authored
Merge pull request #356 from sleepingeight/surya/opt-cmp
optimize fastfloat_strncasecmp
2 parents fd9cad9 + 4eb0d80 commit babb1f3

File tree

2 files changed

+142
-8
lines changed

2 files changed

+142
-8
lines changed

include/fast_float/float_common.h

Lines changed: 139 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define FASTFLOAT_FLOAT_COMMON_H
33

44
#include <cfloat>
5+
#include <cstddef>
56
#include <cstdint>
67
#include <cassert>
78
#include <cstring>
@@ -267,18 +268,151 @@ struct is_supported_char_type
267268
> {
268269
};
269270

271+
template <typename UC>
272+
inline FASTFLOAT_CONSTEXPR14 bool
273+
fastfloat_strncasecmp3(UC const *actual_mixedcase,
274+
UC const *expected_lowercase) {
275+
uint64_t mask{0};
276+
FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 1) { mask = 0x2020202020202020; }
277+
else FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 2) {
278+
mask = 0x0020002000200020;
279+
}
280+
else FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 4) {
281+
mask = 0x0000002000000020;
282+
}
283+
else {
284+
return false;
285+
}
286+
287+
uint64_t val1{0}, val2{0};
288+
if (cpp20_and_in_constexpr()) {
289+
for (size_t i = 0; i < 3; i++) {
290+
if ((actual_mixedcase[i] | 32) != expected_lowercase[i]) {
291+
return false;
292+
}
293+
return true;
294+
}
295+
} else {
296+
FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 1 || sizeof(UC) == 2) {
297+
::memcpy(&val1, actual_mixedcase, 3 * sizeof(UC));
298+
::memcpy(&val2, expected_lowercase, 3 * sizeof(UC));
299+
val1 |= mask;
300+
val2 |= mask;
301+
return val1 == val2;
302+
}
303+
else FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 4) {
304+
::memcpy(&val1, actual_mixedcase, 2 * sizeof(UC));
305+
::memcpy(&val2, expected_lowercase, 2 * sizeof(UC));
306+
val1 |= mask;
307+
if (val1 != val2) {
308+
return false;
309+
}
310+
return (actual_mixedcase[2] | 32) == (expected_lowercase[2]);
311+
}
312+
else {
313+
return false;
314+
}
315+
}
316+
317+
return true;
318+
}
319+
320+
template <typename UC>
321+
inline FASTFLOAT_CONSTEXPR14 bool
322+
fastfloat_strncasecmp5(UC const *actual_mixedcase,
323+
UC const *expected_lowercase) {
324+
uint64_t mask{0};
325+
uint64_t val1{0}, val2{0};
326+
if (cpp20_and_in_constexpr()) {
327+
for (size_t i = 0; i < 5; i++) {
328+
if ((actual_mixedcase[i] | 32) != expected_lowercase[i]) {
329+
return false;
330+
}
331+
return true;
332+
}
333+
} else {
334+
FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 1) {
335+
mask = 0x2020202020202020;
336+
::memcpy(&val1, actual_mixedcase, 5 * sizeof(UC));
337+
::memcpy(&val2, expected_lowercase, 5 * sizeof(UC));
338+
val1 |= mask;
339+
val2 |= mask;
340+
return val1 == val2;
341+
}
342+
else FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 2) {
343+
mask = 0x0020002000200020;
344+
::memcpy(&val1, actual_mixedcase, 4 * sizeof(UC));
345+
::memcpy(&val2, expected_lowercase, 4 * sizeof(UC));
346+
val1 |= mask;
347+
if (val1 != val2) {
348+
return false;
349+
}
350+
return (actual_mixedcase[4] | 32) == (expected_lowercase[4]);
351+
}
352+
else FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 4) {
353+
mask = 0x0000002000000020;
354+
::memcpy(&val1, actual_mixedcase, 2 * sizeof(UC));
355+
::memcpy(&val2, expected_lowercase, 2 * sizeof(UC));
356+
val1 |= mask;
357+
if (val1 != val2) {
358+
return false;
359+
}
360+
::memcpy(&val1, actual_mixedcase + 2, 2 * sizeof(UC));
361+
::memcpy(&val2, expected_lowercase + 2, 2 * sizeof(UC));
362+
val1 |= mask;
363+
if (val1 != val2) {
364+
return false;
365+
}
366+
return (actual_mixedcase[4] | 32) == (expected_lowercase[4]);
367+
}
368+
else {
369+
return false;
370+
}
371+
}
372+
373+
return true;
374+
}
375+
270376
// Compares two ASCII strings in a case insensitive manner.
271377
template <typename UC>
272378
inline FASTFLOAT_CONSTEXPR14 bool
273379
fastfloat_strncasecmp(UC const *actual_mixedcase, UC const *expected_lowercase,
274380
size_t length) {
275-
for (size_t i = 0; i < length; ++i) {
276-
UC const actual = actual_mixedcase[i];
277-
if ((actual < 256 ? actual | 32 : actual) != expected_lowercase[i]) {
278-
return false;
381+
uint64_t mask{0};
382+
FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 1) { mask = 0x2020202020202020; }
383+
else FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 2) {
384+
mask = 0x0020002000200020;
385+
}
386+
else FASTFLOAT_IF_CONSTEXPR17(sizeof(UC) == 4) {
387+
mask = 0x0000002000000020;
388+
}
389+
else {
390+
return false;
391+
}
392+
393+
if (cpp20_and_in_constexpr()) {
394+
for (size_t i = 0; i < length; i++) {
395+
if ((actual_mixedcase[i] | 32) != expected_lowercase[i]) {
396+
return false;
397+
}
398+
return true;
279399
}
400+
} else {
401+
uint64_t val1{0}, val2{0};
402+
size_t sz{8 / (sizeof(UC))};
403+
for (size_t i = 0; i < length; i += sz) {
404+
val1 = val2 = 0;
405+
sz = std::min(sz, length - i);
406+
::memcpy(&val1, actual_mixedcase + i, sz * sizeof(UC));
407+
::memcpy(&val2, expected_lowercase + i, sz * sizeof(UC));
408+
val1 |= mask;
409+
val2 |= mask;
410+
if (val1 != val2) {
411+
return false;
412+
}
413+
}
414+
return true;
280415
}
281-
return true;
282416
}
283417

284418
#ifndef FLT_EVAL_METHOD

include/fast_float/parse_number.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ from_chars_result_t<UC>
3535
++first;
3636
}
3737
if (last - first >= 3) {
38-
if (fastfloat_strncasecmp(first, str_const_nan<UC>(), 3)) {
38+
if (fastfloat_strncasecmp3(first, str_const_nan<UC>())) {
3939
answer.ptr = (first += 3);
4040
value = minusSign ? -std::numeric_limits<T>::quiet_NaN()
4141
: std::numeric_limits<T>::quiet_NaN();
@@ -54,9 +54,9 @@ from_chars_result_t<UC>
5454
}
5555
return answer;
5656
}
57-
if (fastfloat_strncasecmp(first, str_const_inf<UC>(), 3)) {
57+
if (fastfloat_strncasecmp3(first, str_const_inf<UC>())) {
5858
if ((last - first >= 8) &&
59-
fastfloat_strncasecmp(first + 3, str_const_inf<UC>() + 3, 5)) {
59+
fastfloat_strncasecmp5(first + 3, str_const_inf<UC>() + 3)) {
6060
answer.ptr = first + 8;
6161
} else {
6262
answer.ptr = first + 3;

0 commit comments

Comments
 (0)