Skip to content

Commit 4ac1fa1

Browse files
authored
feat: impl PartitionSpec::PartitionPath (#500)
1 parent 40834dd commit 4ac1fa1

File tree

3 files changed

+74
-0
lines changed

3 files changed

+74
-0
lines changed

src/iceberg/partition_spec.cc

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,19 @@
2626
#include <map>
2727
#include <memory>
2828
#include <ranges>
29+
#include <sstream>
2930
#include <unordered_map>
3031
#include <utility>
3132

3233
#include "iceberg/result.h"
34+
#include "iceberg/row/partition_values.h"
3335
#include "iceberg/schema.h"
3436
#include "iceberg/schema_field.h"
3537
#include "iceberg/transform.h"
3638
#include "iceberg/util/formatter.h" // IWYU pragma: keep
3739
#include "iceberg/util/macros.h"
3840
#include "iceberg/util/type_util.h"
41+
#include "iceberg/util/url_encoder.h"
3942

4043
namespace iceberg {
4144

@@ -98,6 +101,25 @@ Result<std::unique_ptr<StructType>> PartitionSpec::PartitionType(
98101
return std::make_unique<StructType>(std::move(partition_fields));
99102
}
100103

104+
Result<std::string> PartitionSpec::PartitionPath(const PartitionValues& data) const {
105+
ICEBERG_PRECHECK(fields_.size() == data.num_fields(),
106+
"Partition spec and data mismatch, expected field num {}, got {}",
107+
fields_.size(), data.num_fields());
108+
std::stringstream ss;
109+
for (int32_t i = 0; i < fields_.size(); ++i) {
110+
ICEBERG_ASSIGN_OR_RAISE(auto value, data.ValueAt(i));
111+
if (i > 0) {
112+
ss << "/";
113+
}
114+
// TODO(zhuo.wang): transform for partition value, will be fixed after transform util
115+
// is ready
116+
std::string partition_value = value.get().ToString();
117+
ss << UrlEncoder::Encode(fields_[i].name()) << "="
118+
<< UrlEncoder::Encode(partition_value);
119+
}
120+
return ss.str();
121+
}
122+
101123
bool PartitionSpec::CompatibleWith(const PartitionSpec& other) const {
102124
if (Equals(other)) {
103125
return true;

src/iceberg/partition_spec.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ class ICEBERG_EXPORT PartitionSpec : public util::Formattable {
6464
/// \brief Get the partition type binding to the input schema.
6565
Result<std::unique_ptr<StructType>> PartitionType(const Schema& schema) const;
6666

67+
/// \brief Get the partition path for the given partition data.
68+
Result<std::string> PartitionPath(const PartitionValues& data) const;
69+
6770
/// \brief Returns true if this spec is equivalent to the other, with partition field
6871
/// ids ignored. That is, if both specs have the same number of fields, field order,
6972
/// field name, source columns, and transforms.

src/iceberg/test/partition_spec_test.cc

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828

2929
#include "iceberg/json_internal.h"
3030
#include "iceberg/partition_field.h"
31+
#include "iceberg/row/partition_values.h"
3132
#include "iceberg/schema.h"
3233
#include "iceberg/schema_field.h"
3334
#include "iceberg/test/matchers.h"
@@ -425,4 +426,52 @@ TEST(PartitionSpecTest, ValidateRedundantPartitionsIdentityTransforms) {
425426
}
426427
}
427428

429+
TEST(PartitionSpecTest, PartitionPath) {
430+
// Create a schema with different field types
431+
auto id_field = SchemaField::MakeRequired(1, "id", int64());
432+
auto name_field = SchemaField::MakeRequired(2, "name", string());
433+
auto ts_field = SchemaField::MakeRequired(3, "ts", timestamp());
434+
Schema schema({id_field, name_field, ts_field}, Schema::kInitialSchemaId);
435+
436+
// Create partition fields
437+
PartitionField id_field_partition(1, 1000, "id_partition", Transform::Identity());
438+
PartitionField name_field_partition(2, 1001, "name_partition", Transform::Identity());
439+
PartitionField ts_field_partition(3, 1002, "ts_partition", Transform::Day());
440+
441+
// Create partition spec
442+
ICEBERG_UNWRAP_OR_FAIL(
443+
auto spec,
444+
PartitionSpec::Make(schema, 1,
445+
{id_field_partition, name_field_partition, ts_field_partition},
446+
false));
447+
448+
{
449+
// Invalid partition values
450+
PartitionValues part_data({Literal::Int(123)});
451+
auto result = spec->PartitionPath(part_data);
452+
EXPECT_THAT(result, IsError(ErrorKind::kInvalidArgument));
453+
EXPECT_THAT(result, HasErrorMessage("Partition spec and data mismatch"));
454+
}
455+
456+
{
457+
// Normal partition values
458+
PartitionValues part_data(
459+
{Literal::Int(123), Literal::String("val2"), Literal::Date(19489)});
460+
ICEBERG_UNWRAP_OR_FAIL(auto path, spec->PartitionPath(part_data));
461+
std::string expected =
462+
"id_partition=123/name_partition=%22val2%22/ts_partition=19489";
463+
EXPECT_EQ(expected, path);
464+
}
465+
466+
{
467+
// Partition values with special characters
468+
PartitionValues part_data(
469+
{Literal::Int(123), Literal::String("val#2"), Literal::Date(19489)});
470+
ICEBERG_UNWRAP_OR_FAIL(auto path, spec->PartitionPath(part_data));
471+
std::string expected =
472+
"id_partition=123/name_partition=%22val%232%22/ts_partition=19489";
473+
EXPECT_EQ(expected, path);
474+
}
475+
}
476+
428477
} // namespace iceberg

0 commit comments

Comments
 (0)