Skip to content

Commit e48fd5c

Browse files
committed
Optimize base64_decode validation using lookup table
1 parent 4eca507 commit e48fd5c

File tree

1 file changed

+14
-8
lines changed

1 file changed

+14
-8
lines changed

cpp/src/arrow/vendored/base64.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
*/
3131

3232
#include "arrow/util/base64.h"
33+
#include <array>
3334
#include <iostream>
3435

3536
namespace arrow {
@@ -40,6 +41,17 @@ static const std::string base64_chars =
4041
"abcdefghijklmnopqrstuvwxyz"
4142
"0123456789+/";
4243

44+
static const std::array<int8_t, 256> kBase64Lookup = [] {
45+
std::array<int8_t, 256> table{};
46+
table.fill(-1);
47+
48+
for (size_t i = 0; i < base64_chars.size(); ++i) {
49+
table[static_cast<uint8_t>(base64_chars[i])] = i;
50+
}
51+
52+
return table;
53+
}();
54+
4355
static std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len) {
4456
std::string ret;
4557
int i = 0;
@@ -119,22 +131,16 @@ Result<std::string> base64_decode(std::string_view encoded_string) {
119131
return Status::Invalid("Invalid base64 input: padding in wrong position");
120132
}
121133

122-
if (base64_chars.find(c) == std::string::npos) {
134+
if (kBase64Lookup[static_cast<uint8_t>(c)] == -1) {
123135
return Status::Invalid("Invalid base64 input: character is not valid base64 character");
124136
}
125137

126-
char_array_4[i++] = c;
138+
char_array_4[i++] = kBase64Lookup[static_cast<uint8_t>(c)];
127139
}
128140

129141
in_++;
130142

131143
if (i == 4) {
132-
for (i = 0; i < 4; i++) {
133-
if (char_array_4[i] != 0) {
134-
char_array_4[i] = base64_chars.find(char_array_4[i]) & 0xff;
135-
}
136-
}
137-
138144
char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
139145
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
140146
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];

0 commit comments

Comments
 (0)