Skip to content

Commit bbdfc05

Browse files
committed
refactor: move url encoder from rest catlog to iceberg/util for common use
1 parent a89924d commit bbdfc05

File tree

13 files changed

+231
-122
lines changed

13 files changed

+231
-122
lines changed

src/iceberg/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ set(ICEBERG_SOURCES
9494
util/timepoint.cc
9595
util/truncate_util.cc
9696
util/type_util.cc
97+
util/url_encoder.cc
9798
util/uuid.cc)
9899

99100
set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS)
@@ -103,26 +104,30 @@ set(ICEBERG_SHARED_INSTALL_INTERFACE_LIBS)
103104

104105
list(APPEND
105106
ICEBERG_STATIC_BUILD_INTERFACE_LIBS
107+
cpr::cpr
106108
nanoarrow::nanoarrow_static
107109
nlohmann_json::nlohmann_json
108110
roaring::roaring
109111
spdlog::spdlog
110112
ZLIB::ZLIB)
111113
list(APPEND
112114
ICEBERG_SHARED_BUILD_INTERFACE_LIBS
115+
cpr::cpr
113116
nanoarrow::nanoarrow_shared
114117
nlohmann_json::nlohmann_json
115118
roaring::roaring
116119
spdlog::spdlog
117120
ZLIB::ZLIB)
118121
list(APPEND
119122
ICEBERG_STATIC_INSTALL_INTERFACE_LIBS
123+
"$<IF:$<BOOL:${CPR_VENDORED}>,iceberg::cpr,cpr::cpr>"
120124
"$<IF:$<BOOL:${NANOARROW_VENDORED}>,iceberg::nanoarrow_static,$<IF:$<TARGET_EXISTS:nanoarrow::nanoarrow_static>,nanoarrow::nanoarrow_static,nanoarrow::nanoarrow_shared>>"
121125
"$<IF:$<BOOL:${NLOHMANN_JSON_VENDORED}>,iceberg::nlohmann_json,$<IF:$<TARGET_EXISTS:nlohmann_json::nlohmann_json>,nlohmann_json::nlohmann_json,nlohmann_json::nlohmann_json>>"
122126
"$<IF:$<BOOL:${CROARING_VENDORED}>,iceberg::roaring,roaring::roaring>"
123127
"$<IF:$<BOOL:${SPDLOG_VENDORED}>,iceberg::spdlog,spdlog::spdlog>")
124128
list(APPEND
125129
ICEBERG_SHARED_INSTALL_INTERFACE_LIBS
130+
"$<IF:$<BOOL:${CPR_VENDORED}>,iceberg::cpr,cpr::cpr>"
126131
"$<IF:$<BOOL:${NANOARROW_VENDORED}>,iceberg::nanoarrow_shared,$<IF:$<TARGET_EXISTS:nanoarrow::nanoarrow_shared>,nanoarrow::nanoarrow_shared,nanoarrow::nanoarrow_static>>"
127132
"$<IF:$<BOOL:${NLOHMANN_JSON_VENDORED}>,iceberg::nlohmann_json,$<IF:$<TARGET_EXISTS:nlohmann_json::nlohmann_json>,nlohmann_json::nlohmann_json,nlohmann_json::nlohmann_json>>"
128133
"$<IF:$<BOOL:${CROARING_VENDORED}>,iceberg::roaring,roaring::roaring>"

src/iceberg/catalog/rest/CMakeLists.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,17 @@ set(ICEBERG_REST_STATIC_INSTALL_INTERFACE_LIBS)
3232
set(ICEBERG_REST_SHARED_INSTALL_INTERFACE_LIBS)
3333

3434
list(APPEND ICEBERG_REST_STATIC_BUILD_INTERFACE_LIBS
35-
"$<IF:$<TARGET_EXISTS:iceberg_static>,iceberg_static,iceberg_shared>" cpr::cpr)
35+
"$<IF:$<TARGET_EXISTS:iceberg_static>,iceberg_static,iceberg_shared>")
3636
list(APPEND ICEBERG_REST_SHARED_BUILD_INTERFACE_LIBS
37-
"$<IF:$<TARGET_EXISTS:iceberg_shared>,iceberg_shared,iceberg_static>" cpr::cpr)
37+
"$<IF:$<TARGET_EXISTS:iceberg_shared>,iceberg_shared,iceberg_static>")
3838
list(APPEND
3939
ICEBERG_REST_STATIC_INSTALL_INTERFACE_LIBS
4040
"$<IF:$<TARGET_EXISTS:iceberg::iceberg_static>,iceberg::iceberg_static,iceberg::iceberg_shared>"
41-
"$<IF:$<BOOL:${CPR_VENDORED}>,iceberg::cpr,cpr::cpr>")
41+
)
4242
list(APPEND
4343
ICEBERG_REST_SHARED_INSTALL_INTERFACE_LIBS
4444
"$<IF:$<TARGET_EXISTS:iceberg::iceberg_shared>,iceberg::iceberg_shared,iceberg::iceberg_static>"
45-
"$<IF:$<BOOL:${CPR_VENDORED}>,iceberg::cpr,cpr::cpr>")
45+
)
4646

4747
add_iceberg_lib(iceberg_rest
4848
SOURCES

src/iceberg/catalog/rest/meson.build

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,8 @@ iceberg_rest_sources = files(
2626
'rest_util.cc',
2727
'types.cc',
2828
)
29-
# cpr does not export symbols, so on Windows it must
30-
# be used as a static lib
31-
cpr_needs_static = (
32-
get_option('default_library') == 'static' or
33-
host_machine.system() == 'windows'
34-
)
35-
cpr_dep = dependency('cpr', static: cpr_needs_static)
3629

37-
iceberg_rest_build_deps = [iceberg_dep, cpr_dep]
30+
iceberg_rest_build_deps = [iceberg_dep]
3831
iceberg_rest_lib = library(
3932
'iceberg_rest',
4033
sources: iceberg_rest_sources,

src/iceberg/catalog/rest/resource_paths.cc

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "iceberg/catalog/rest/rest_util.h"
2525
#include "iceberg/table_identifier.h"
2626
#include "iceberg/util/macros.h"
27+
#include "iceberg/util/url_encoder.h"
2728

2829
namespace iceberg::rest {
2930

@@ -77,7 +78,7 @@ Result<std::string> ResourcePaths::Tables(const Namespace& ns) const {
7778

7879
Result<std::string> ResourcePaths::Table(const TableIdentifier& ident) const {
7980
ICEBERG_ASSIGN_OR_RAISE(std::string encoded_namespace, EncodeNamespace(ident.ns));
80-
ICEBERG_ASSIGN_OR_RAISE(std::string encoded_table_name, EncodeString(ident.name));
81+
ICEBERG_ASSIGN_OR_RAISE(std::string encoded_table_name, UrlEncoder::Encode(ident.name));
8182
return std::format("{}/v1/{}namespaces/{}/tables/{}", base_uri_, prefix_,
8283
encoded_namespace, encoded_table_name);
8384
}
@@ -94,14 +95,14 @@ Result<std::string> ResourcePaths::Rename() const {
9495

9596
Result<std::string> ResourcePaths::Metrics(const TableIdentifier& ident) const {
9697
ICEBERG_ASSIGN_OR_RAISE(std::string encoded_namespace, EncodeNamespace(ident.ns));
97-
ICEBERG_ASSIGN_OR_RAISE(std::string encoded_table_name, EncodeString(ident.name));
98+
ICEBERG_ASSIGN_OR_RAISE(std::string encoded_table_name, UrlEncoder::Encode(ident.name));
9899
return std::format("{}/v1/{}namespaces/{}/tables/{}/metrics", base_uri_, prefix_,
99100
encoded_namespace, encoded_table_name);
100101
}
101102

102103
Result<std::string> ResourcePaths::Credentials(const TableIdentifier& ident) const {
103104
ICEBERG_ASSIGN_OR_RAISE(std::string encoded_namespace, EncodeNamespace(ident.ns));
104-
ICEBERG_ASSIGN_OR_RAISE(std::string encoded_table_name, EncodeString(ident.name));
105+
ICEBERG_ASSIGN_OR_RAISE(std::string encoded_table_name, UrlEncoder::Encode(ident.name));
105106
return std::format("{}/v1/{}namespaces/{}/tables/{}/credentials", base_uri_, prefix_,
106107
encoded_namespace, encoded_table_name);
107108
}

src/iceberg/catalog/rest/rest_util.cc

Lines changed: 6 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,10 @@
2222
#include <format>
2323
#include <unordered_set>
2424

25-
#include <cpr/util.h>
26-
2725
#include "iceberg/catalog/rest/endpoint.h"
2826
#include "iceberg/table_identifier.h"
2927
#include "iceberg/util/macros.h"
28+
#include "iceberg/util/url_encoder.h"
3029

3130
namespace iceberg::rest {
3231

@@ -41,44 +40,17 @@ std::string_view TrimTrailingSlash(std::string_view str) {
4140
return str;
4241
}
4342

44-
Result<std::string> EncodeString(std::string_view str_to_encode) {
45-
if (str_to_encode.empty()) {
46-
return "";
47-
}
48-
49-
// Use CPR's urlEncode which internally calls libcurl's curl_easy_escape()
50-
cpr::util::SecureString encoded = cpr::util::urlEncode(str_to_encode);
51-
if (encoded.empty()) {
52-
return InvalidArgument("Failed to encode string '{}'", str_to_encode);
53-
}
54-
55-
return std::string{encoded.data(), encoded.size()};
56-
}
57-
58-
Result<std::string> DecodeString(std::string_view str_to_decode) {
59-
if (str_to_decode.empty()) {
60-
return "";
61-
}
62-
63-
// Use CPR's urlDecode which internally calls libcurl's curl_easy_unescape()
64-
cpr::util::SecureString decoded = cpr::util::urlDecode(str_to_decode);
65-
if (decoded.empty()) {
66-
return InvalidArgument("Failed to decode string '{}'", str_to_decode);
67-
}
68-
69-
return std::string{decoded.data(), decoded.size()};
70-
}
71-
7243
Result<std::string> EncodeNamespace(const Namespace& ns_to_encode) {
7344
if (ns_to_encode.levels.empty()) {
7445
return "";
7546
}
7647

77-
ICEBERG_ASSIGN_OR_RAISE(std::string result, EncodeString(ns_to_encode.levels.front()));
48+
ICEBERG_ASSIGN_OR_RAISE(std::string result,
49+
UrlEncoder::Encode(ns_to_encode.levels.front()));
7850

7951
for (size_t i = 1; i < ns_to_encode.levels.size(); ++i) {
8052
ICEBERG_ASSIGN_OR_RAISE(std::string encoded_level,
81-
EncodeString(ns_to_encode.levels[i]));
53+
UrlEncoder::Encode(ns_to_encode.levels[i]));
8254
result.append(kNamespaceEscapeSeparator);
8355
result.append(std::move(encoded_level));
8456
}
@@ -97,14 +69,14 @@ Result<Namespace> DecodeNamespace(std::string_view str_to_decode) {
9769

9870
while (end != std::string::npos) {
9971
ICEBERG_ASSIGN_OR_RAISE(std::string decoded_level,
100-
DecodeString(str_to_decode.substr(start, end - start)));
72+
UrlEncoder::Decode(str_to_decode.substr(start, end - start)));
10173
ns.levels.push_back(std::move(decoded_level));
10274
start = end + kNamespaceEscapeSeparator.size();
10375
end = str_to_decode.find(kNamespaceEscapeSeparator, start);
10476
}
10577

10678
ICEBERG_ASSIGN_OR_RAISE(std::string decoded_level,
107-
DecodeString(str_to_decode.substr(start)));
79+
UrlEncoder::Decode(str_to_decode.substr(start)));
10880
ns.levels.push_back(std::move(decoded_level));
10981
return ns;
11082
}

src/iceberg/catalog/rest/rest_util.h

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -38,24 +38,6 @@ namespace iceberg::rest {
3838
/// \return The trimmed string with all trailing slashes removed.
3939
ICEBERG_REST_EXPORT std::string_view TrimTrailingSlash(std::string_view str);
4040

41-
/// \brief URL-encode a string (RFC 3986).
42-
///
43-
/// \details This implementation uses libcurl (via CPR), which follows RFC 3986 strictly:
44-
/// - Unreserved characters: [A-Z], [a-z], [0-9], "-", "_", ".", "~"
45-
/// - Space is encoded as "%20" (unlike Java's URLEncoder which uses "+").
46-
/// - All other characters are percent-encoded (%XX).
47-
/// \param str_to_encode The string to encode.
48-
/// \return The URL-encoded string or InvalidArgument if the string is invalid.
49-
ICEBERG_REST_EXPORT Result<std::string> EncodeString(std::string_view str_to_encode);
50-
51-
/// \brief URL-decode a string.
52-
///
53-
/// \details Decodes percent-encoded characters (e.g., "%20" -> space). Uses libcurl's URL
54-
/// decoding via the CPR library.
55-
/// \param str_to_decode The encoded string to decode.
56-
/// \return The decoded string or InvalidArgument if the string is invalid.
57-
ICEBERG_REST_EXPORT Result<std::string> DecodeString(std::string_view str_to_decode);
58-
5941
/// \brief Encode a Namespace into a URL-safe component.
6042
///
6143
/// \details Encodes each level separately using EncodeString, then joins them with "%1F".

src/iceberg/meson.build

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,18 @@ iceberg_sources = files(
116116
'util/timepoint.cc',
117117
'util/truncate_util.cc',
118118
'util/type_util.cc',
119+
'util/url_encoder.cc',
119120
'util/uuid.cc',
120121
)
121122

123+
# cpr does not export symbols, so on Windows it must
124+
# be used as a static lib
125+
cpr_needs_static = (
126+
get_option('default_library') == 'static' or
127+
host_machine.system() == 'windows'
128+
)
129+
cpr_dep = dependency('cpr', static: cpr_needs_static)
130+
122131
# CRoaring does not export symbols, so on Windows it must
123132
# be used as a static lib
124133
croaring_needs_static = (
@@ -132,6 +141,7 @@ spdlog_dep = dependency('spdlog')
132141
zlib_dep = dependency('zlib')
133142

134143
iceberg_deps = [
144+
cpr_dep,
135145
croaring_dep,
136146
nanoarrow_dep,
137147
nlohmann_json_dep,

src/iceberg/test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ add_iceberg_test(util_test
108108
location_util_test.cc
109109
string_util_test.cc
110110
truncate_util_test.cc
111+
url_encoder_test.cc
111112
uuid_test.cc
112113
visit_type_test.cc)
113114

src/iceberg/test/rest_util_test.cc

Lines changed: 0 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -71,61 +71,6 @@ TEST(RestUtilTest, RoundTripUrlEncodeDecodeNamespace) {
7171
EXPECT_THAT(DecodeNamespace(""), HasValue(::testing::Eq(Namespace{.levels = {}})));
7272
}
7373

74-
TEST(RestUtilTest, EncodeString) {
75-
// RFC 3986 unreserved characters should not be encoded
76-
EXPECT_THAT(EncodeString("abc123XYZ"), HasValue(::testing::Eq("abc123XYZ")));
77-
EXPECT_THAT(EncodeString("test-file_name.txt~backup"),
78-
HasValue(::testing::Eq("test-file_name.txt~backup")));
79-
80-
// Spaces and special characters should be encoded
81-
EXPECT_THAT(EncodeString("hello world"), HasValue(::testing::Eq("hello%20world")));
82-
EXPECT_THAT(EncodeString("test@example.com"),
83-
HasValue(::testing::Eq("test%40example.com")));
84-
EXPECT_THAT(EncodeString("path/to/file"), HasValue(::testing::Eq("path%2Fto%2Ffile")));
85-
EXPECT_THAT(EncodeString("key=value&foo=bar"),
86-
HasValue(::testing::Eq("key%3Dvalue%26foo%3Dbar")));
87-
EXPECT_THAT(EncodeString("100%"), HasValue(::testing::Eq("100%25")));
88-
EXPECT_THAT(EncodeString("hello\x1Fworld"), HasValue(::testing::Eq("hello%1Fworld")));
89-
EXPECT_THAT(EncodeString(""), HasValue(::testing::Eq("")));
90-
}
91-
92-
TEST(RestUtilTest, DecodeString) {
93-
// Decode percent-encoded strings
94-
EXPECT_THAT(DecodeString("hello%20world"), HasValue(::testing::Eq("hello world")));
95-
EXPECT_THAT(DecodeString("test%40example.com"),
96-
HasValue(::testing::Eq("test@example.com")));
97-
EXPECT_THAT(DecodeString("path%2Fto%2Ffile"), HasValue(::testing::Eq("path/to/file")));
98-
EXPECT_THAT(DecodeString("key%3Dvalue%26foo%3Dbar"),
99-
HasValue(::testing::Eq("key=value&foo=bar")));
100-
EXPECT_THAT(DecodeString("100%25"), HasValue(::testing::Eq("100%")));
101-
102-
// ASCII Unit Separator (0x1F)
103-
EXPECT_THAT(DecodeString("hello%1Fworld"), HasValue(::testing::Eq("hello\x1Fworld")));
104-
105-
// Unreserved characters remain unchanged
106-
EXPECT_THAT(DecodeString("test-file_name.txt~backup"),
107-
HasValue(::testing::Eq("test-file_name.txt~backup")));
108-
EXPECT_THAT(DecodeString(""), HasValue(::testing::Eq("")));
109-
}
110-
111-
TEST(RestUtilTest, EncodeDecodeStringRoundTrip) {
112-
std::vector<std::string> test_cases = {"hello world",
113-
"test@example.com",
114-
"path/to/file",
115-
"key=value&foo=bar",
116-
"100%",
117-
"hello\x1Fworld",
118-
"special!@#$%^&*()chars",
119-
"mixed-123_test.file~ok",
120-
""};
121-
122-
for (const auto& test : test_cases) {
123-
ICEBERG_UNWRAP_OR_FAIL(std::string encoded, EncodeString(test));
124-
ICEBERG_UNWRAP_OR_FAIL(std::string decoded, DecodeString(encoded));
125-
EXPECT_EQ(decoded, test) << "Round-trip failed for: " << test;
126-
}
127-
}
128-
12974
TEST(RestUtilTest, MergeConfigs) {
13075
std::unordered_map<std::string, std::string> server_defaults = {
13176
{"default1", "value1"}, {"default2", "value2"}, {"common", "default_value"}};

0 commit comments

Comments
 (0)