Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/iceberg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ set(ICEBERG_SOURCES
util/timepoint.cc
util/truncate_util.cc
util/type_util.cc
util/url_encoder.cc
util/uuid.cc)

set(ICEBERG_STATIC_BUILD_INTERFACE_LIBS)
Expand Down
1 change: 1 addition & 0 deletions src/iceberg/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ iceberg_sources = files(
'util/timepoint.cc',
'util/truncate_util.cc',
'util/type_util.cc',
'util/url_encoder.cc',
'util/uuid.cc',
)

Expand Down
1 change: 1 addition & 0 deletions src/iceberg/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ add_iceberg_test(util_test
location_util_test.cc
string_util_test.cc
truncate_util_test.cc
url_encoder_test.cc
uuid_test.cc
visit_type_test.cc)

Expand Down
1 change: 1 addition & 0 deletions src/iceberg/test/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ iceberg_tests = {
'location_util_test.cc',
'string_util_test.cc',
'truncate_util_test.cc',
'url_encoder_test.cc',
'uuid_test.cc',
'visit_type_test.cc',
),
Expand Down
83 changes: 83 additions & 0 deletions src/iceberg/test/url_encoder_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "iceberg/util/url_encoder.h"

#include <gtest/gtest.h>

#include "iceberg/test/matchers.h"

namespace iceberg {

TEST(UrlEncoderTest, Encode) {
// RFC 3986 unreserved characters should not be encoded
EXPECT_THAT(UrlEncoder::Encode("abc123XYZ"), ::testing::Eq("abc123XYZ"));
EXPECT_THAT(UrlEncoder::Encode("test-file_name.txt~backup"),
::testing::Eq("test-file_name.txt~backup"));

// Spaces and special characters should be encoded
EXPECT_THAT(UrlEncoder::Encode("hello world"), ::testing::Eq("hello%20world"));
EXPECT_THAT(UrlEncoder::Encode("test@example.com"),
::testing::Eq("test%40example.com"));
EXPECT_THAT(UrlEncoder::Encode("path/to/file"), ::testing::Eq("path%2Fto%2Ffile"));
EXPECT_THAT(UrlEncoder::Encode("key=value&foo=bar"),
::testing::Eq("key%3Dvalue%26foo%3Dbar"));
EXPECT_THAT(UrlEncoder::Encode("100%"), ::testing::Eq("100%25"));
EXPECT_THAT(UrlEncoder::Encode("hello\x1fworld"), ::testing::Eq("hello%1Fworld"));
EXPECT_THAT(UrlEncoder::Encode(""), ::testing::Eq(""));
}

TEST(UrlEncoderTest, Decode) {
// Decode percent-encoded strings
EXPECT_THAT(UrlEncoder::Decode("hello%20world"), ::testing::Eq("hello world"));
EXPECT_THAT(UrlEncoder::Decode("test%40example.com"),
::testing::Eq("test@example.com"));
EXPECT_THAT(UrlEncoder::Decode("path%2fto%2Ffile"), ::testing::Eq("path/to/file"));
EXPECT_THAT(UrlEncoder::Decode("key%3dvalue%26foo%3Dbar"),
::testing::Eq("key=value&foo=bar"));
EXPECT_THAT(UrlEncoder::Decode("100%25"), ::testing::Eq("100%"));

// ASCII Unit Separator (0x1F)
EXPECT_THAT(UrlEncoder::Decode("hello%1Fworld"), ::testing::Eq("hello\x1Fworld"));

// Unreserved characters remain unchanged
EXPECT_THAT(UrlEncoder::Decode("test-file_name.txt~backup"),
::testing::Eq("test-file_name.txt~backup"));
EXPECT_THAT(UrlEncoder::Decode(""), ::testing::Eq(""));
}

TEST(UrlEncoderTest, EncodeDecodeRoundTrip) {
std::vector<std::string> test_cases = {"hello world",
"test@example.com",
"path/to/file",
"key=value&foo=bar",
"100%",
"hello\x1Fworld",
"special!@#$%^&*()chars",
"mixed-123_test.file~ok",
""};

for (const auto& test : test_cases) {
std::string encoded = UrlEncoder::Encode(test);
std::string decoded = UrlEncoder::Decode(encoded);
EXPECT_EQ(decoded, test) << "Round-trip failed for: " << test;
}
}

} // namespace iceberg
1 change: 1 addition & 0 deletions src/iceberg/util/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ install_headers(
'timepoint.h',
'truncate_util.h',
'type_util.h',
'url_encoder.h',
'uuid.h',
'visitor_generate.h',
'visit_type.h',
Expand Down
84 changes: 84 additions & 0 deletions src/iceberg/util/url_encoder.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include "iceberg/util/url_encoder.h"

#include <locale>

namespace iceberg {

namespace {

bool IsUnreserved(unsigned char c) {
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
c == '-' || c == '.' || c == '_' || c == '~';
}

// Helper: convert hex char to int (0–15), returns -1 if invalid
constexpr int8_t FromHex(char c) {
if (c >= '0' && c <= '9') return c - '0';
if (c >= 'A' && c <= 'F') return c - 'A' + 10;
if (c >= 'a' && c <= 'f') return c - 'a' + 10;
return -1;
}

} // namespace

std::string UrlEncoder::Encode(std::string_view str_to_encode) {
static const char* hex_chars = "0123456789ABCDEF";
std::string result;
result.reserve(str_to_encode.size() * 3 / 2 /* Heuristic reservation */);

for (char c : str_to_encode) {
if (IsUnreserved(c)) {
result += c;
} else {
result += '%';
result += hex_chars[c >> 4];
result += hex_chars[c & 0xF];
}
}

return result;
}
Comment thread
wgtmac marked this conversation as resolved.

std::string UrlEncoder::Decode(std::string_view str_to_decode) {
std::string result;
result.reserve(str_to_decode.size());

for (size_t i = 0; i < str_to_decode.size(); ++i) {
char c = str_to_decode[i];
if (c == '%' && i + 2 < str_to_decode.size()) {
int8_t hi = FromHex(str_to_decode[i + 1]);
int8_t lo = FromHex(str_to_decode[i + 2]);

if (hi != -1 && lo != -1) {
result += static_cast<char>((hi << 4) | lo);
i += 2;
continue;
}
}
// Not a valid %XX sequence, copy as-is
result += c;
}

return result;
}

} // namespace iceberg
53 changes: 53 additions & 0 deletions src/iceberg/util/url_encoder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#pragma once

#include <string>
#include <string_view>

#include "iceberg/iceberg_export.h"

/// \file iceberg/util/url_encoder.h
/// \brief URL encoding and decoding.

namespace iceberg {

/// \brief Utilities for encoding and decoding URLs.
class ICEBERG_EXPORT UrlEncoder {
public:
/// \brief URL-encode a string.
///
/// \details This is a simple implementation of url-encode
/// - Unreserved characters: [A-Z], [a-z], [0-9], "-", "_", ".", "~"
/// - Space is encoded as "%20" (unlike Java's URLEncoder which uses "+").
/// - All other characters are percent-encoded (%XX).
/// \param str_to_encode The string to encode.
/// \return The URL-encoded string.
static std::string Encode(std::string_view str_to_encode);

/// \brief URL-decode a string.
///
/// \details Decodes percent-encoded characters (e.g., "%20" -> space).
/// \param str_to_decode The encoded string to decode.
/// \return The decoded string.
static std::string Decode(std::string_view str_to_decode);
};

} // namespace iceberg
Loading