Skip to content

Commit 68fe381

Browse files
authored
feat: add simple url encoder & decoder (#457)
1 parent c61344f commit 68fe381

File tree

8 files changed

+225
-0
lines changed

8 files changed

+225
-0
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ set(ICEBERG_SOURCES
9797
util/timepoint.cc
9898
util/truncate_util.cc
9999
util/type_util.cc
100+
util/url_encoder.cc
100101
util/uuid.cc)
101102

102103
set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS)

src/iceberg/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ iceberg_sources = files(
118118
'util/timepoint.cc',
119119
'util/truncate_util.cc',
120120
'util/type_util.cc',
121+
'util/url_encoder.cc',
121122
'util/uuid.cc',
122123
)
123124

src/iceberg/test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ add_iceberg_test(util_test
108108
location_util_test.cc
109109
string_util_test.cc
110110
truncate_util_test.cc
111+
url_encoder_test.cc
111112
uuid_test.cc
112113
visit_type_test.cc)
113114

src/iceberg/test/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ iceberg_tests = {
8888
'location_util_test.cc',
8989
'string_util_test.cc',
9090
'truncate_util_test.cc',
91+
'url_encoder_test.cc',
9192
'uuid_test.cc',
9293
'visit_type_test.cc',
9394
),
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/util/url_encoder.h"
21+
22+
#include <gtest/gtest.h>
23+
24+
#include "iceberg/test/matchers.h"
25+
26+
namespace iceberg {
27+
28+
TEST(UrlEncoderTest, Encode) {
29+
// RFC 3986 unreserved characters should not be encoded
30+
EXPECT_THAT(UrlEncoder::Encode("abc123XYZ"), ::testing::Eq("abc123XYZ"));
31+
EXPECT_THAT(UrlEncoder::Encode("test-file_name.txt~backup"),
32+
::testing::Eq("test-file_name.txt~backup"));
33+
34+
// Spaces and special characters should be encoded
35+
EXPECT_THAT(UrlEncoder::Encode("hello world"), ::testing::Eq("hello%20world"));
36+
EXPECT_THAT(UrlEncoder::Encode("test@example.com"),
37+
::testing::Eq("test%40example.com"));
38+
EXPECT_THAT(UrlEncoder::Encode("path/to/file"), ::testing::Eq("path%2Fto%2Ffile"));
39+
EXPECT_THAT(UrlEncoder::Encode("key=value&foo=bar"),
40+
::testing::Eq("key%3Dvalue%26foo%3Dbar"));
41+
EXPECT_THAT(UrlEncoder::Encode("100%"), ::testing::Eq("100%25"));
42+
EXPECT_THAT(UrlEncoder::Encode("hello\x1fworld"), ::testing::Eq("hello%1Fworld"));
43+
EXPECT_THAT(UrlEncoder::Encode(""), ::testing::Eq(""));
44+
}
45+
46+
TEST(UrlEncoderTest, Decode) {
47+
// Decode percent-encoded strings
48+
EXPECT_THAT(UrlEncoder::Decode("hello%20world"), ::testing::Eq("hello world"));
49+
EXPECT_THAT(UrlEncoder::Decode("test%40example.com"),
50+
::testing::Eq("test@example.com"));
51+
EXPECT_THAT(UrlEncoder::Decode("path%2fto%2Ffile"), ::testing::Eq("path/to/file"));
52+
EXPECT_THAT(UrlEncoder::Decode("key%3dvalue%26foo%3Dbar"),
53+
::testing::Eq("key=value&foo=bar"));
54+
EXPECT_THAT(UrlEncoder::Decode("100%25"), ::testing::Eq("100%"));
55+
56+
// ASCII Unit Separator (0x1F)
57+
EXPECT_THAT(UrlEncoder::Decode("hello%1Fworld"), ::testing::Eq("hello\x1Fworld"));
58+
59+
// Unreserved characters remain unchanged
60+
EXPECT_THAT(UrlEncoder::Decode("test-file_name.txt~backup"),
61+
::testing::Eq("test-file_name.txt~backup"));
62+
EXPECT_THAT(UrlEncoder::Decode(""), ::testing::Eq(""));
63+
}
64+
65+
TEST(UrlEncoderTest, EncodeDecodeRoundTrip) {
66+
std::vector<std::string> test_cases = {"hello world",
67+
"test@example.com",
68+
"path/to/file",
69+
"key=value&foo=bar",
70+
"100%",
71+
"hello\x1Fworld",
72+
"special!@#$%^&*()chars",
73+
"mixed-123_test.file~ok",
74+
""};
75+
76+
for (const auto& test : test_cases) {
77+
std::string encoded = UrlEncoder::Encode(test);
78+
std::string decoded = UrlEncoder::Decode(encoded);
79+
EXPECT_EQ(decoded, test) << "Round-trip failed for: " << test;
80+
}
81+
}
82+
83+
} // namespace iceberg

src/iceberg/util/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ install_headers(
3838
'timepoint.h',
3939
'truncate_util.h',
4040
'type_util.h',
41+
'url_encoder.h',
4142
'uuid.h',
4243
'visitor_generate.h',
4344
'visit_type.h',

src/iceberg/util/url_encoder.cc

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#include "iceberg/util/url_encoder.h"
21+
22+
#include <locale>
23+
24+
namespace iceberg {
25+
26+
namespace {
27+
28+
bool IsUnreserved(unsigned char c) {
29+
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
30+
c == '-' || c == '.' || c == '_' || c == '~';
31+
}
32+
33+
// Helper: convert hex char to int (0–15), returns -1 if invalid
34+
constexpr int8_t FromHex(char c) {
35+
if (c >= '0' && c <= '9') return c - '0';
36+
if (c >= 'A' && c <= 'F') return c - 'A' + 10;
37+
if (c >= 'a' && c <= 'f') return c - 'a' + 10;
38+
return -1;
39+
}
40+
41+
} // namespace
42+
43+
std::string UrlEncoder::Encode(std::string_view str_to_encode) {
44+
static const char* kHexChars = "0123456789ABCDEF";
45+
std::string result;
46+
result.reserve(str_to_encode.size() * 3 / 2 /* Heuristic reservation */);
47+
48+
for (char c : str_to_encode) {
49+
if (IsUnreserved(c)) {
50+
result += c;
51+
} else {
52+
result += '%';
53+
result += kHexChars[c >> 4];
54+
result += kHexChars[c & 0xF];
55+
}
56+
}
57+
58+
return result;
59+
}
60+
61+
std::string UrlEncoder::Decode(std::string_view str_to_decode) {
62+
std::string result;
63+
result.reserve(str_to_decode.size());
64+
65+
for (size_t i = 0; i < str_to_decode.size(); ++i) {
66+
char c = str_to_decode[i];
67+
if (c == '%' && i + 2 < str_to_decode.size()) {
68+
int8_t hi = FromHex(str_to_decode[i + 1]);
69+
int8_t lo = FromHex(str_to_decode[i + 2]);
70+
71+
if (hi != -1 && lo != -1) {
72+
result += static_cast<char>((hi << 4) | lo);
73+
i += 2;
74+
continue;
75+
}
76+
}
77+
// Not a valid %XX sequence, copy as-is
78+
result += c;
79+
}
80+
81+
return result;
82+
}
83+
84+
} // namespace iceberg

src/iceberg/util/url_encoder.h

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
#pragma once
21+
22+
#include <string>
23+
#include <string_view>
24+
25+
#include "iceberg/iceberg_export.h"
26+
27+
/// \file iceberg/util/url_encoder.h
28+
/// \brief URL encoding and decoding.
29+
30+
namespace iceberg {
31+
32+
/// \brief Utilities for encoding and decoding URLs.
33+
class ICEBERG_EXPORT UrlEncoder {
34+
public:
35+
/// \brief URL-encode a string.
36+
///
37+
/// \details This is a simple implementation of url-encode
38+
/// - Unreserved characters: [A-Z], [a-z], [0-9], "-", "_", ".", "~"
39+
/// - Space is encoded as "%20" (unlike Java's URLEncoder which uses "+").
40+
/// - All other characters are percent-encoded (%XX).
41+
/// \param str_to_encode The string to encode.
42+
/// \return The URL-encoded string.
43+
static std::string Encode(std::string_view str_to_encode);
44+
45+
/// \brief URL-decode a string.
46+
///
47+
/// \details Decodes percent-encoded characters (e.g., "%20" -> space).
48+
/// \param str_to_decode The encoded string to decode.
49+
/// \return The decoded string.
50+
static std::string Decode(std::string_view str_to_decode);
51+
};
52+
53+
} // namespace iceberg

0 commit comments

Comments
 (0)