Skip to content

Commit 8416de6

Browse files
committed
feat: add SortOrderBuilder
Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
1 parent 1c431b6 commit 8416de6

11 files changed

Lines changed: 452 additions & 4 deletions

src/iceberg/result.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ enum class ErrorKind {
4848
kNotFound,
4949
kNotImplemented,
5050
kNotSupported,
51+
kValidationError,
5152
kUnknownError,
5253
};
5354

@@ -97,6 +98,7 @@ DEFINE_ERROR_FUNCTION(NotAllowed)
9798
DEFINE_ERROR_FUNCTION(NotFound)
9899
DEFINE_ERROR_FUNCTION(NotImplemented)
99100
DEFINE_ERROR_FUNCTION(NotSupported)
101+
DEFINE_ERROR_FUNCTION(ValidationError)
100102
DEFINE_ERROR_FUNCTION(UnknownError)
101103

102104
#undef DEFINE_ERROR_FUNCTION

src/iceberg/sort_order.cc

Lines changed: 117 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,19 @@
2020
#include "iceberg/sort_order.h"
2121

2222
#include <format>
23+
#include <memory>
24+
#include <optional>
2325
#include <ranges>
2426

27+
#include "iceberg/exception.h"
28+
#include "iceberg/expression/term.h"
29+
#include "iceberg/result.h"
30+
#include "iceberg/schema.h"
31+
#include "iceberg/sort_field.h"
32+
#include "iceberg/transform.h"
33+
#include "iceberg/util/checked_cast.h"
2534
#include "iceberg/util/formatter.h" // IWYU pragma: keep
35+
#include "iceberg/util/macros.h"
2636

2737
namespace iceberg {
2838

@@ -31,7 +41,7 @@ SortOrder::SortOrder(int32_t order_id, std::vector<SortField> fields)
3141

3242
const std::shared_ptr<SortOrder>& SortOrder::Unsorted() {
3343
static const std::shared_ptr<SortOrder> unsorted =
34-
std::make_shared<SortOrder>(/*order_id=*/0, std::vector<SortField>{});
44+
std::make_shared<SortOrder>(kUnsortedOrderId, std::vector<SortField>{});
3545
return unsorted;
3646
}
3747

@@ -80,4 +90,110 @@ bool SortOrder::Equals(const SortOrder& other) const {
8090
return order_id_ == other.order_id_ && fields_ == other.fields_;
8191
}
8292

93+
// SortOrderBuilder implementation
94+
95+
struct SortOrderBuilder::Impl {
96+
const std::shared_ptr<Schema>& schema;
97+
std::optional<int32_t> sort_id;
98+
std::vector<SortField> fields;
99+
bool case_sensitive{false};
100+
101+
explicit Impl(const std::shared_ptr<Schema>& schema) : schema(schema) {}
102+
};
103+
104+
SortOrderBuilder::~SortOrderBuilder() = default;
105+
106+
SortOrderBuilder::SortOrderBuilder(const std::shared_ptr<Schema>& schema)
107+
: impl_(std::make_unique<Impl>(schema)) {}
108+
109+
std::unique_ptr<SortOrderBuilder> SortOrderBuilder::BuildFromSchema(
110+
const std::shared_ptr<Schema>& schema) {
111+
return std::unique_ptr<SortOrderBuilder>(new SortOrderBuilder(schema)); // NOLINT
112+
}
113+
114+
SortOrderBuilder& SortOrderBuilder::WithOrderId(int32_t sort_id) {
115+
impl_->sort_id = sort_id;
116+
return *this;
117+
}
118+
119+
SortOrderBuilder& SortOrderBuilder::CaseSensitive(bool case_sensitive) {
120+
impl_->case_sensitive = case_sensitive;
121+
return *this;
122+
}
123+
124+
Result<std::shared_ptr<SortOrder>> SortOrderBuilder::BuildUncheckd() {
125+
if (impl_->fields.empty()) {
126+
if (impl_->sort_id.has_value() && impl_->sort_id != SortOrder::kUnsortedOrderId) {
127+
return InvalidArgument("Unsorted order ID must be 0");
128+
}
129+
return SortOrder::Unsorted();
130+
}
131+
132+
if (impl_->sort_id.has_value() && impl_->sort_id == SortOrder::kUnsortedOrderId) {
133+
return InvalidArgument("Sort order ID 0 is reserved for unsorted order");
134+
}
135+
136+
// default ID to 1 as 0 is reserved for unsorted order
137+
return std::make_shared<SortOrder>(
138+
impl_->sort_id.value_or(SortOrder::kInitialSortOrderId), std::move(impl_->fields));
139+
}
140+
141+
Result<std::shared_ptr<SortOrder>> SortOrderBuilder::Build() {
142+
ICEBERG_ASSIGN_OR_RAISE(auto sort_order, BuildUncheckd());
143+
ICEBERG_RETURN_UNEXPECTED(CheckCompatibility(sort_order, impl_->schema));
144+
return sort_order;
145+
}
146+
147+
SortOrderBuilder& SortOrderBuilder::AddSortField(
148+
int32_t source_id, const std::shared_ptr<Transform>& transform,
149+
SortDirection direction, NullOrder null_order) {
150+
impl_->fields.emplace_back(source_id, transform, direction, null_order);
151+
return *this;
152+
}
153+
154+
SortOrderBuilder& SortOrderBuilder::AddSortField(const std::shared_ptr<Term>& term,
155+
SortDirection direction,
156+
NullOrder null_order) {
157+
if (auto named_ref = internal::checked_pointer_cast<NamedReference>(term)) {
158+
auto bound_ref = named_ref->Bind(*impl_->schema, impl_->case_sensitive);
159+
ICEBERG_CHECK(bound_ref.has_value(), "Failed to bind named reference to schema.");
160+
int32_t source_id = bound_ref.value()->field().field_id();
161+
impl_->fields.emplace_back(source_id, Transform::Identity(), direction, null_order);
162+
} else if (auto unbound_transform =
163+
internal::checked_pointer_cast<UnboundTransform>(term)) {
164+
auto bound_transform = unbound_transform->Bind(*impl_->schema, impl_->case_sensitive);
165+
ICEBERG_CHECK(bound_transform.has_value(),
166+
"Failed to bind unbound transform to schema.");
167+
int32_t source_id = bound_transform.value()->reference()->field().field_id();
168+
impl_->fields.emplace_back(source_id, bound_transform.value()->transform(), direction,
169+
null_order);
170+
} else {
171+
throw IcebergError(std::format(
172+
"Invalid term: {}, expected either a named reference or an unbound transform",
173+
term ? term->ToString() : "null"));
174+
}
175+
176+
return *this;
177+
}
178+
179+
Status SortOrderBuilder::CheckCompatibility(const std::shared_ptr<SortOrder>& sort_order,
180+
const std::shared_ptr<Schema>& schema) {
181+
for (const auto& field : sort_order->fields()) {
182+
ICEBERG_ASSIGN_OR_RAISE(auto schema_field, schema->FindFieldById(field.source_id()));
183+
if (schema_field == std::nullopt) {
184+
return ValidationError("Cannot find source column for sort field: {}", field);
185+
}
186+
187+
const auto& source_type = schema_field.value().get().type();
188+
189+
if (!source_type->is_primitive()) {
190+
return ValidationError("Cannot sort by non-primitive source field: {}",
191+
*source_type);
192+
}
193+
194+
ICEBERG_RETURN_UNEXPECTED(field.transform()->ResultType(source_type));
195+
}
196+
return {};
197+
}
198+
83199
} // namespace iceberg

src/iceberg/sort_order.h

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,15 @@
2020
#pragma once
2121

2222
#include <cstdint>
23+
#include <memory>
2324
#include <span>
2425
#include <vector>
2526

27+
#include "iceberg/expression/expressions.h"
28+
#include "iceberg/expression/term.h"
2629
#include "iceberg/iceberg_export.h"
2730
#include "iceberg/sort_field.h"
31+
#include "iceberg/type_fwd.h"
2832
#include "iceberg/util/formattable.h"
2933

3034
namespace iceberg {
@@ -36,6 +40,7 @@ namespace iceberg {
3640
/// applied to the data.
3741
class ICEBERG_EXPORT SortOrder : public util::Formattable {
3842
public:
43+
static constexpr int32_t kUnsortedOrderId = 0;
3944
static constexpr int32_t kInitialSortOrderId = 1;
4045

4146
SortOrder(int32_t order_id, std::vector<SortField> fields);
@@ -77,4 +82,86 @@ class ICEBERG_EXPORT SortOrder : public util::Formattable {
7782
std::vector<SortField> fields_;
7883
};
7984

85+
/// \brief A builder used to create valid SortOrder instances.
86+
class ICEBERG_EXPORT SortOrderBuilder {
87+
public:
88+
/// \brief Create a builder for a new SortOrder
89+
///
90+
/// \return A new SortOrderBuilder instance initialized with Schema
91+
static std::unique_ptr<SortOrderBuilder> BuildFromSchema(
92+
const std::shared_ptr<Schema>& schema);
93+
94+
/// \brief Add an expression term to the sort, ascending with the given null order.
95+
SortOrderBuilder& Asc(const std::shared_ptr<Term>& term, NullOrder null_order) {
96+
return AddSortField(term, SortDirection::kAscending, null_order);
97+
}
98+
99+
/// \brief Add an expression term to the sort, descending with the given null order.
100+
SortOrderBuilder& Desc(const std::shared_ptr<Term>& term, NullOrder null_order) {
101+
return AddSortField(term, SortDirection::kDescending, null_order);
102+
}
103+
104+
/// \brief Add a sort field to the sort order.
105+
SortOrderBuilder& SortBy(std::string name, SortDirection direction,
106+
NullOrder null_order) {
107+
return AddSortField(Expressions::Ref(std::move(name)), direction, null_order);
108+
}
109+
110+
/// \brief Add a sort field to the sort order.
111+
SortOrderBuilder& SortBy(const std::shared_ptr<Term>& term, SortDirection direction,
112+
NullOrder null_order) {
113+
return AddSortField(term, direction, null_order);
114+
}
115+
116+
/// \brief Set sort id to the sort order.
117+
SortOrderBuilder& WithOrderId(int32_t sort_id);
118+
119+
/// \brief Set case sensitive to the sort order.
120+
SortOrderBuilder& CaseSensitive(bool case_sensitive);
121+
122+
/// \brief Add a sort field to the sort order with the specified source field ID,
123+
/// transform, direction, and null order.
124+
///
125+
/// \param source_id The source field ID.
126+
/// \param transform The transform to apply to the field.
127+
/// \param direction The sort direction.
128+
/// \param null_order The null ordering behavior (e.g., nulls first or nulls last).
129+
SortOrderBuilder& AddSortField(int32_t source_id,
130+
const std::shared_ptr<Transform>& transform,
131+
SortDirection direction, NullOrder null_order);
132+
133+
/// \brief Destructor
134+
~SortOrderBuilder();
135+
136+
// Delete copy constructor and assignment operator
137+
SortOrderBuilder(const SortOrderBuilder&) = delete;
138+
SortOrderBuilder& operator=(const SortOrderBuilder&) = delete;
139+
140+
// Enable move constructor and assignment operator
141+
SortOrderBuilder(SortOrderBuilder&&) noexcept = default;
142+
SortOrderBuilder& operator=(SortOrderBuilder&&) noexcept = default;
143+
144+
/// \brief Builds a SortOrder instance.
145+
///
146+
/// \return A Result containing the constructed SortOrder or an error
147+
Result<std::shared_ptr<SortOrder>> Build();
148+
149+
private:
150+
/// \brief Private constructor for building from Schema
151+
explicit SortOrderBuilder(const std::shared_ptr<Schema>& schema);
152+
153+
SortOrderBuilder& AddSortField(const std::shared_ptr<Term>& term,
154+
SortDirection direction, NullOrder null_order);
155+
156+
/// \brief Builds an unchecked SortOrder instance.
157+
Result<std::shared_ptr<SortOrder>> BuildUncheckd();
158+
159+
static Status CheckCompatibility(const std::shared_ptr<SortOrder>& sort_order,
160+
const std::shared_ptr<Schema>& schema);
161+
162+
/// Internal state members
163+
struct Impl;
164+
std::unique_ptr<Impl> impl_;
165+
};
166+
80167
} // namespace iceberg

src/iceberg/test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ add_iceberg_test(schema_test
7373
partition_field_test.cc
7474
partition_spec_test.cc
7575
sort_field_test.cc
76+
sort_order_builder_test.cc
7677
sort_order_test.cc
7778
snapshot_test.cc
7879
schema_util_test.cc)

src/iceberg/test/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ iceberg_tests = {
3838
'schema_util_test.cc',
3939
'snapshot_test.cc',
4040
'sort_field_test.cc',
41+
'sort_order_builder_test.cc',
4142
'sort_order_test.cc',
4243
'transform_test.cc',
4344
'type_test.cc',

src/iceberg/test/schema_field_test.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ TEST(SchemaFieldTest, Equality) {
6363
iceberg::SchemaField field1(1, "foo", iceberg::int32(), false);
6464
iceberg::SchemaField field2(2, "foo", iceberg::int32(), false);
6565
iceberg::SchemaField field3(1, "bar", iceberg::int32(), false);
66-
iceberg::SchemaField field4(1, "foo", std::make_shared<iceberg::LongType>(), false);
66+
iceberg::SchemaField field4(1, "foo", iceberg::int64(), false);
6767
iceberg::SchemaField field5(1, "foo", iceberg::int32(), true);
6868
iceberg::SchemaField field6(1, "foo", iceberg::int32(), false);
6969

src/iceberg/test/schema_test.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
#include <gmock/gmock.h>
2626
#include <gtest/gtest.h>
2727

28-
#include "gtest/gtest.h"
2928
#include "iceberg/result.h"
3029
#include "iceberg/schema_field.h"
3130
#include "iceberg/test/matchers.h"

0 commit comments

Comments
 (0)