Skip to content

Commit a51ef4a

Browse files
committed
feat: add SortOrderBuilder
Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
1 parent 1c431b6 commit a51ef4a

11 files changed

Lines changed: 455 additions & 4 deletions

src/iceberg/result.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ enum class ErrorKind {
4848
kNotFound,
4949
kNotImplemented,
5050
kNotSupported,
51+
kValidationError,
5152
kUnknownError,
5253
};
5354

@@ -97,6 +98,7 @@ DEFINE_ERROR_FUNCTION(NotAllowed)
9798
DEFINE_ERROR_FUNCTION(NotFound)
9899
DEFINE_ERROR_FUNCTION(NotImplemented)
99100
DEFINE_ERROR_FUNCTION(NotSupported)
101+
DEFINE_ERROR_FUNCTION(ValidationError)
100102
DEFINE_ERROR_FUNCTION(UnknownError)
101103

102104
#undef DEFINE_ERROR_FUNCTION

src/iceberg/sort_order.cc

Lines changed: 120 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,18 @@
2020
#include "iceberg/sort_order.h"
2121

2222
#include <format>
23+
#include <memory>
24+
#include <optional>
2325
#include <ranges>
2426

27+
#include "iceberg/exception.h"
28+
#include "iceberg/expression/term.h"
29+
#include "iceberg/result.h"
30+
#include "iceberg/schema.h"
31+
#include "iceberg/sort_field.h"
32+
#include "iceberg/transform.h"
2533
#include "iceberg/util/formatter.h" // IWYU pragma: keep
34+
#include "iceberg/util/macros.h"
2635

2736
namespace iceberg {
2837

@@ -31,7 +40,7 @@ SortOrder::SortOrder(int32_t order_id, std::vector<SortField> fields)
3140

3241
const std::shared_ptr<SortOrder>& SortOrder::Unsorted() {
3342
static const std::shared_ptr<SortOrder> unsorted =
34-
std::make_shared<SortOrder>(/*order_id=*/0, std::vector<SortField>{});
43+
std::make_shared<SortOrder>(kUnsortedOrderId, std::vector<SortField>{});
3544
return unsorted;
3645
}
3746

@@ -80,4 +89,114 @@ bool SortOrder::Equals(const SortOrder& other) const {
8089
return order_id_ == other.order_id_ && fields_ == other.fields_;
8190
}
8291

92+
// SortOrderBuilder implementation
93+
94+
struct SortOrderBuilder::Impl {
95+
const Schema* schema;
96+
std::optional<int32_t> sort_id;
97+
std::vector<SortField> fields;
98+
bool case_sensitive{false};
99+
100+
explicit Impl(const Schema* schema) : schema(schema) {}
101+
};
102+
103+
SortOrderBuilder::~SortOrderBuilder() = default;
104+
105+
SortOrderBuilder::SortOrderBuilder(SortOrderBuilder&&) noexcept = default;
106+
107+
SortOrderBuilder& SortOrderBuilder::operator=(SortOrderBuilder&&) noexcept = default;
108+
109+
SortOrderBuilder::SortOrderBuilder(const Schema* schema)
110+
: impl_(std::make_unique<Impl>(schema)) {}
111+
112+
std::unique_ptr<SortOrderBuilder> SortOrderBuilder::BuildFromSchema(
113+
const Schema* schema) {
114+
return std::unique_ptr<SortOrderBuilder>(new SortOrderBuilder(schema)); // NOLINT
115+
}
116+
117+
SortOrderBuilder& SortOrderBuilder::WithOrderId(int32_t sort_id) {
118+
impl_->sort_id = sort_id;
119+
return *this;
120+
}
121+
122+
SortOrderBuilder& SortOrderBuilder::CaseSensitive(bool case_sensitive) {
123+
impl_->case_sensitive = case_sensitive;
124+
return *this;
125+
}
126+
127+
Result<std::unique_ptr<SortOrder>> SortOrderBuilder::BuildUncheckd() {
128+
if (impl_->fields.empty()) {
129+
if (impl_->sort_id.has_value() && impl_->sort_id != SortOrder::kUnsortedOrderId) {
130+
return InvalidArgument("Unsorted order ID must be 0");
131+
}
132+
return std::make_unique<SortOrder>(SortOrder::kUnsortedOrderId,
133+
std::vector<SortField>{});
134+
}
135+
136+
if (impl_->sort_id.has_value() && impl_->sort_id == SortOrder::kUnsortedOrderId) {
137+
return InvalidArgument("Sort order ID 0 is reserved for unsorted order");
138+
}
139+
140+
// default ID to 1 as 0 is reserved for unsorted order
141+
return std::make_unique<SortOrder>(
142+
impl_->sort_id.value_or(SortOrder::kInitialSortOrderId), std::move(impl_->fields));
143+
}
144+
145+
Result<std::unique_ptr<SortOrder>> SortOrderBuilder::Build() {
146+
ICEBERG_ASSIGN_OR_RAISE(auto sort_order, BuildUncheckd());
147+
ICEBERG_RETURN_UNEXPECTED(CheckCompatibility(sort_order, impl_->schema));
148+
return sort_order;
149+
}
150+
151+
SortOrderBuilder& SortOrderBuilder::AddSortField(
152+
int32_t source_id, const std::shared_ptr<Transform>& transform,
153+
SortDirection direction, NullOrder null_order) {
154+
impl_->fields.emplace_back(source_id, transform, direction, null_order);
155+
return *this;
156+
}
157+
158+
SortOrderBuilder& SortOrderBuilder::AddSortField(const std::shared_ptr<Term>& term,
159+
SortDirection direction,
160+
NullOrder null_order) {
161+
if (auto named_ref = std::dynamic_pointer_cast<NamedReference>(term)) {
162+
auto bound_ref = named_ref->Bind(*impl_->schema, impl_->case_sensitive);
163+
ICEBERG_CHECK(bound_ref.has_value(), "Failed to bind named reference to schema.");
164+
int32_t source_id = bound_ref.value()->field().field_id();
165+
impl_->fields.emplace_back(source_id, Transform::Identity(), direction, null_order);
166+
} else if (auto unbound_transform = std::dynamic_pointer_cast<UnboundTransform>(term)) {
167+
auto bound_transform = unbound_transform->Bind(*impl_->schema, impl_->case_sensitive);
168+
ICEBERG_CHECK(bound_transform.has_value(),
169+
"Failed to bind unbound transform to schema.");
170+
int32_t source_id = bound_transform.value()->reference()->field().field_id();
171+
impl_->fields.emplace_back(source_id, bound_transform.value()->transform(), direction,
172+
null_order);
173+
} else {
174+
throw IcebergError(std::format(
175+
"Invalid term: {}, expected either a named reference or an unbound transform",
176+
term ? term->ToString() : "null"));
177+
}
178+
179+
return *this;
180+
}
181+
182+
Status SortOrderBuilder::CheckCompatibility(const std::unique_ptr<SortOrder>& sort_order,
183+
const Schema* schema) {
184+
for (const auto& field : sort_order->fields()) {
185+
ICEBERG_ASSIGN_OR_RAISE(auto schema_field, schema->FindFieldById(field.source_id()));
186+
if (schema_field == std::nullopt) {
187+
return ValidationError("Cannot find source column for sort field: {}", field);
188+
}
189+
190+
const auto& source_type = schema_field.value().get().type();
191+
192+
if (!source_type->is_primitive()) {
193+
return ValidationError("Cannot sort by non-primitive source field: {}",
194+
*source_type);
195+
}
196+
197+
ICEBERG_RETURN_UNEXPECTED(field.transform()->ResultType(source_type));
198+
}
199+
return {};
200+
}
201+
83202
} // namespace iceberg

src/iceberg/sort_order.h

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,15 @@
2020
#pragma once
2121

2222
#include <cstdint>
23+
#include <memory>
2324
#include <span>
2425
#include <vector>
2526

27+
#include "iceberg/expression/expressions.h"
28+
#include "iceberg/expression/term.h"
2629
#include "iceberg/iceberg_export.h"
2730
#include "iceberg/sort_field.h"
31+
#include "iceberg/type_fwd.h"
2832
#include "iceberg/util/formattable.h"
2933

3034
namespace iceberg {
@@ -36,6 +40,7 @@ namespace iceberg {
3640
/// applied to the data.
3741
class ICEBERG_EXPORT SortOrder : public util::Formattable {
3842
public:
43+
static constexpr int32_t kUnsortedOrderId = 0;
3944
static constexpr int32_t kInitialSortOrderId = 1;
4045

4146
SortOrder(int32_t order_id, std::vector<SortField> fields);
@@ -77,4 +82,85 @@ class ICEBERG_EXPORT SortOrder : public util::Formattable {
7782
std::vector<SortField> fields_;
7883
};
7984

85+
/// \brief A builder used to create valid SortOrder instances.
86+
class ICEBERG_EXPORT SortOrderBuilder {
87+
public:
88+
/// \brief Create a builder for a new SortOrder
89+
///
90+
/// \return A new SortOrderBuilder instance initialized with Schema
91+
static std::unique_ptr<SortOrderBuilder> BuildFromSchema(const Schema* schema);
92+
93+
/// \brief Add an expression term to the sort, ascending with the given null order.
94+
SortOrderBuilder& Asc(const std::shared_ptr<Term>& term, NullOrder null_order) {
95+
return AddSortField(term, SortDirection::kAscending, null_order);
96+
}
97+
98+
/// \brief Add an expression term to the sort, descending with the given null order.
99+
SortOrderBuilder& Desc(const std::shared_ptr<Term>& term, NullOrder null_order) {
100+
return AddSortField(term, SortDirection::kDescending, null_order);
101+
}
102+
103+
/// \brief Add a sort field to the sort order.
104+
SortOrderBuilder& SortBy(std::string name, SortDirection direction,
105+
NullOrder null_order) {
106+
return AddSortField(Expressions::Ref(std::move(name)), direction, null_order);
107+
}
108+
109+
/// \brief Add a sort field to the sort order.
110+
SortOrderBuilder& SortBy(const std::shared_ptr<Term>& term, SortDirection direction,
111+
NullOrder null_order) {
112+
return AddSortField(term, direction, null_order);
113+
}
114+
115+
/// \brief Set sort id to the sort order.
116+
SortOrderBuilder& WithOrderId(int32_t sort_id);
117+
118+
/// \brief Set case sensitive to the sort order.
119+
SortOrderBuilder& CaseSensitive(bool case_sensitive);
120+
121+
/// \brief Add a sort field to the sort order with the specified source field ID,
122+
/// transform, direction, and null order.
123+
///
124+
/// \param source_id The source field ID.
125+
/// \param transform The transform to apply to the field.
126+
/// \param direction The sort direction.
127+
/// \param null_order The null ordering behavior (e.g., nulls first or nulls last).
128+
SortOrderBuilder& AddSortField(int32_t source_id,
129+
const std::shared_ptr<Transform>& transform,
130+
SortDirection direction, NullOrder null_order);
131+
132+
/// \brief Builds a SortOrder instance.
133+
///
134+
/// \return A Result containing the constructed SortOrder or an error
135+
Result<std::unique_ptr<SortOrder>> Build();
136+
137+
/// \brief Destructor
138+
~SortOrderBuilder();
139+
140+
// Delete copy operations (use BuildFromSchema to create a new builder)
141+
SortOrderBuilder(const SortOrderBuilder&) = delete;
142+
SortOrderBuilder& operator=(const SortOrderBuilder&) = delete;
143+
144+
// Enable move operations
145+
SortOrderBuilder(SortOrderBuilder&&) noexcept;
146+
SortOrderBuilder& operator=(SortOrderBuilder&&) noexcept;
147+
148+
private:
149+
/// \brief Private constructor for building from Schema
150+
explicit SortOrderBuilder(const Schema* schema);
151+
152+
SortOrderBuilder& AddSortField(const std::shared_ptr<Term>& term,
153+
SortDirection direction, NullOrder null_order);
154+
155+
/// \brief Builds an unchecked SortOrder instance.
156+
Result<std::unique_ptr<SortOrder>> BuildUncheckd();
157+
158+
static Status CheckCompatibility(const std::unique_ptr<SortOrder>& sort_order,
159+
const Schema* schema);
160+
161+
/// Internal state members
162+
struct Impl;
163+
std::unique_ptr<Impl> impl_;
164+
};
165+
80166
} // namespace iceberg

src/iceberg/test/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ add_iceberg_test(schema_test
7373
partition_field_test.cc
7474
partition_spec_test.cc
7575
sort_field_test.cc
76+
sort_order_builder_test.cc
7677
sort_order_test.cc
7778
snapshot_test.cc
7879
schema_util_test.cc)

src/iceberg/test/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ iceberg_tests = {
3838
'schema_util_test.cc',
3939
'snapshot_test.cc',
4040
'sort_field_test.cc',
41+
'sort_order_builder_test.cc',
4142
'sort_order_test.cc',
4243
'transform_test.cc',
4344
'type_test.cc',

src/iceberg/test/schema_field_test.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ TEST(SchemaFieldTest, Equality) {
6363
iceberg::SchemaField field1(1, "foo", iceberg::int32(), false);
6464
iceberg::SchemaField field2(2, "foo", iceberg::int32(), false);
6565
iceberg::SchemaField field3(1, "bar", iceberg::int32(), false);
66-
iceberg::SchemaField field4(1, "foo", std::make_shared<iceberg::LongType>(), false);
66+
iceberg::SchemaField field4(1, "foo", iceberg::int64(), false);
6767
iceberg::SchemaField field5(1, "foo", iceberg::int32(), true);
6868
iceberg::SchemaField field6(1, "foo", iceberg::int32(), false);
6969

src/iceberg/test/schema_test.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
#include <gmock/gmock.h>
2626
#include <gtest/gtest.h>
2727

28-
#include "gtest/gtest.h"
2928
#include "iceberg/result.h"
3029
#include "iceberg/schema_field.h"
3130
#include "iceberg/test/matchers.h"

0 commit comments

Comments
 (0)